From 6e68704fea3b5cd0b0e43d3dcff2c90fc198c479 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 9 Feb 2016 18:48:19 -0800 Subject: [PATCH 01/49] make js lib alias logic a little better --- src/modules.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/modules.js b/src/modules.js index 8f14ab680a337..13efa7217f5bf 100644 --- a/src/modules.js +++ b/src/modules.js @@ -164,9 +164,10 @@ var LibraryManager = { if (typeof lib[x] === 'string') { var target = x; while (typeof lib[target] === 'string') { - if (lib[target].indexOf('(') >= 0) continue libloop; + // ignore code, aliases are just simple names + if (lib[target].search(/[({; ]/) >= 0) continue libloop; + // ignore trivial pass-throughs to Math.* if (lib[target].indexOf('Math_') == 0) continue libloop; - if (lib[target].indexOf(';') > 0) continue libloop; // ignore code target = lib[target]; } if (lib[target + '__asm']) continue; // This is an alias of an asm library function. Also needs to be fully optimized. From 860e55e23e6958f64c0eaea584d26bc912902e1f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 10 Feb 2016 14:11:25 -0800 Subject: [PATCH 02/49] remove old WASM option --- emcc.py | 11 -------- src/settings.js | 6 ----- tests/test_browser.py | 61 ------------------------------------------- tests/test_core.py | 1 - tests/test_other.py | 27 ------------------- tools/js_optimizer.py | 3 --- 6 files changed, 109 deletions(-) diff --git a/emcc.py b/emcc.py index 16ad6ac12f682..6d7dbf01590fc 100755 --- a/emcc.py +++ b/emcc.py @@ -955,9 +955,6 @@ def check(input_file): elif shared.Settings.SIDE_MODULE: assert not shared.Settings.MAIN_MODULE memory_init_file = False # memory init file is not supported with side modules, must be executable synchronously (for dlopen) - if shared.Settings.WASM: - logging.warning('disabling WASM in SIDE_MODULE') - shared.Settings.WASM = 0 if shared.Settings.MAIN_MODULE or shared.Settings.SIDE_MODULE: assert shared.Settings.ASM_JS, 'module linking requires asm.js output (-s ASM_JS=1)' @@ -972,9 +969,6 @@ def check(input_file): logging.warning('not all asm.js optimizations are possible with ALLOW_MEMORY_GROWTH, disabling those') shared.Settings.ASM_JS = 2 # memory growth does not validate as asm.js http://discourse.wicg.io/t/request-for-comments-switching-resizing-heaps-in-asm-js/641/23 - if shared.Settings.WASM: - assert not shared.Settings.ALLOW_MEMORY_GROWTH, 'memory growth is not supported with WASM=1' - if shared.Settings.EMULATE_FUNCTION_POINTER_CASTS: shared.Settings.ALIASING_FUNCTION_POINTERS = 0 @@ -1905,11 +1899,6 @@ def un_src(): # use this if you want to modify the script and need it to be inli worker_target_basename = target_basename + '.worker' open(target, 'w').write(open(shared.path_from_root('src', 'webGLClient.js')).read() + '\n' + open(shared.path_from_root('src', 'proxyClient.js')).read().replace('{{{ filename }}}', shared.Settings.PROXY_TO_WORKER_FILENAME or worker_target_basename).replace('{{{ IDBStore.js }}}', open(shared.path_from_root('src', 'IDBStore.js')).read())) - if shared.Settings.WASM: - logging.debug('converting to WebAssembly') - wasm_target = unsuffixed(js_target) + '.wasm' - subprocess.check_call([shared.PYTHON, shared.path_from_root('third_party', 'wasm-polyfill', 'wasmator.py'), js_target, wasm_target, shared.Settings.EXPORT_NAME]) - log_time('final emitting') if DEBUG: logging.debug('total time: %.2f seconds', (time.time() - start_time)) diff --git a/src/settings.js b/src/settings.js index 8eb8a6e77dda8..674d6f226f0d6 100644 --- a/src/settings.js +++ b/src/settings.js @@ -629,12 +629,6 @@ var WASM_BACKEND = 0; // Whether to use the WebAssembly backend that is in devel // This requires that BINARYEN be set, as we use Binaryen's s2wasm to // translate the backend output. -var WASM = 0; // Older WebAssembly experiment. Compress the asm.js module into an early proposal for WebAssembly, - // and ship a decompressor that runs on the client. - // Note that wasm loading is asynchronous in the browser, and for that reason we wrap the entire emitted - // code in a function - things will not reach the global scope by default. You can access things on the - // Module object. - // Ports var USE_SDL = 1; // Specify the SDL version that is being linked against. diff --git a/tests/test_browser.py b/tests/test_browser.py index 044fe57c95e0f..750c808bad229 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -2957,67 +2957,6 @@ def test_meminit_big(self): self.btest(d, expected='0', args=args + ["--closure", "0", "-g"]) self.btest(d, expected='0', args=args + ["--closure", "1"]) - def test_wasm_polyfill_prototype(self): - self.clear() - open('main.cpp', 'w').write(self.with_report_result(r''' - #include - int main() { - std::cout << "Hello!\n"; - int result = 7; - REPORT_RESULT(); - return 0; - } - ''')) - def separate(): - print '*** verify that running the wasmator after emcc works' - Popen([PYTHON, EMCC, 'main.cpp', '-O2', '-o', 'test.o.html']).communicate() - subprocess.check_call([PYTHON, path_from_root('third_party', 'wasm-polyfill', 'wasmator.py'), 'test.o.js', 'test.o.wasm', 'Module']) - def together(): - print '*** verify that running the wasmator using emcc -s WASM=1 works' - Popen([PYTHON, EMCC, 'main.cpp', '-O2', '-o', 'test.o.html', '-s', 'WASM=1']).communicate() - def together_worker(): - print '*** verify that running the wasmator using emcc -s WASM=1 works, running in a worker' - Popen([PYTHON, EMCC, 'main.cpp', '-O2', '-o', 'test.o.html', '-s', 'WASM=1', '--proxy-to-worker']).communicate() - for build, check_error in [ - (separate, True), - (together, True), - (together_worker, False) # onerror does not work in workers - ]: - build() - src = open('test.o.js').read() - open('test.o.js', 'w').write(''' - onerror = function() { - Module.print('fail!'); - var xhr = new XMLHttpRequest(); - xhr.open('GET', 'http://localhost:8888/report_result?99'); - xhr.onload = function() { - console.log('close!'); - window.close(); - }; - setTimeout(xhr.onload, 2000); - xhr.send(); - }; - - ''' + src) - print 'browser' - self.run_browser('test.o.html', None, '/report_result?7') - print 'shell' - self.do_run('', 'Hello!', no_build=True, basename='test') # test in the shell too - assert os.path.exists('test.o.wasm') - os.unlink('test.o.wasm') - if check_error: - print 'error verify' - self.run_browser('test.o.html', None, '/report_result?99') # without the wasm, we failz - print 'shell' - ok = False - try: - self.do_run('', 'Hello!', no_build=True, basename='test') # test in the shell too - ok = True - except: - pass - assert not ok - os.unlink('test.o.js') - def test_canvas_style_proxy(self): self.btest('canvas_style_proxy.c', expected='1', args=['--proxy-to-worker', '--shell-file', path_from_root('tests/canvas_style_proxy_shell.html'), '--pre-js', path_from_root('tests/canvas_style_proxy_pre.js')]) diff --git a/tests/test_core.py b/tests/test_core.py index 0cf3327dc275f..e2aaf60dd2eb4 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -7895,7 +7895,6 @@ def setUp(self): asm2g = make_run("asm2g", compiler=CLANG, emcc_args=["-O2", "-g", "-s", "ASSERTIONS=1", "-s", "SAFE_HEAP=1"]) asm2i = make_run("asm2i", compiler=CLANG, emcc_args=["-O2", '-s', 'EMTERPRETIFY=1']) #asm2m = make_run("asm2m", compiler=CLANG, emcc_args=["-O2", "--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2", "-s", "ASSERTIONS=1"]) -#asm2w = make_run("asm2w", compiler=CLANG, emcc_args=["-O2", "-s", "WASM=1"]) #binaryen = make_run("binaryen", compiler=CLANG, emcc_args=["-s", "BINARYEN='..path..'"]) #normalyen = make_run("normalyen", compiler=CLANG, emcc_args=['-s', 'GLOBAL_BASE=1024']) # useful comparison to binaryen diff --git a/tests/test_other.py b/tests/test_other.py index 63e31595b47d3..22e15e6267eca 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -5320,33 +5320,6 @@ def test_emcc_dev_null(self): assert proc.returncode == 0 self.assertContained('#define __EMSCRIPTEN__ 1', out) # all our defines should show up - def test_emcc_wasm_0(self): - default_error_message = 'cannot use WASM=1 when full asm.js validation was disabled' - for args, ok, error_message in [ - ([], False, ''), - (['-O1'], True, ''), - (['-O2'], True, ''), - (['-O3'], True, ''), - (['-O2', '-g'], True, ''), - (['-s', 'ASM_JS=1'], True, ''), - (['-s', 'WASM=0'], True, ''), - (['-s', 'WASM=1'], False, ''), - (['-s', 'ALLOW_MEMORY_GROWTH=1'], False, 'memory growth is not supported with WASM=1'), - (['-s', 'ALLOW_MEMORY_GROWTH=1', '-O1'], False, 'memory growth is not supported with WASM=1'), - (['-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-s', 'EMTERPRETIFY_WHITELIST=["_main"]', '-O2', '-s', 'ASSERTIONS=1'], True, ''), - ]: - print 'emcc_wasm_0', args, ok - if not error_message: error_message = default_error_message - proc = Popen([PYTHON, EMCC, path_from_root('tests', 'hello_world.c'), '-s', 'WASM=1'] + args, stdout=PIPE, stderr=PIPE) - out, err = proc.communicate() - if ok: - assert proc.returncode == 0 - self.assertNotContained(error_message, err) - self.assertContained('hello, world!', run_js('a.out.js')) - else: - assert proc.returncode != 0 - self.assertContained(error_message, err) - def test_umask_0(self): open('src.c', 'w').write(r''' #include diff --git a/tools/js_optimizer.py b/tools/js_optimizer.py index 45b4836270d71..be42d06bb5caf 100644 --- a/tools/js_optimizer.py +++ b/tools/js_optimizer.py @@ -422,9 +422,6 @@ def write_chunk(chunk, i): else: filenames = [] - if shared.Settings.WASM: - passes = filter(lambda p: p != 'minifyWhitespace', passes) # if we are going to wasmify the asm module, no need to minify it before hand - if len(filenames) > 0: if not use_native(passes, source_map) or not get_native_optimizer(): commands = map(lambda filename: js_engine + From 80cb1fdc9ac29235d340eccab3d839788d922e3c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 10 Feb 2016 14:17:43 -0800 Subject: [PATCH 03/49] be more tolerant in other.test_no_nuthin, as LLVM builds of various sorts emit different code sizes --- tests/test_other.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_other.py b/tests/test_other.py index 22e15e6267eca..8b651e56ef076 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -4202,7 +4202,7 @@ def test_no_filesystem(self): assert FS_MARKER not in open('a.out.js').read() print 'yes fs, no fs:', yes_size, no_size assert yes_size - no_size > 100000 # 100K of FS code is removed - assert no_size < 315000 + assert no_size < 360000 def test_no_nuthin(self): print 'part one: check NO_FILESYSTEM is automatically set, and effective' @@ -4229,7 +4229,7 @@ def do(name, source, moar_opts): assert sizes['no_nuthin'] < 0.975*sizes['no_fs'] assert sizes['no_fs_manual'] < sizes['no_fs'] # manual can remove a tiny bit more assert sizes['no_fs'] < 1.02*sizes['no_fs_manual'] - test([], 0.75, 320000) + test([], 0.75, 360000) test(['-O1'], 0.66, 210000) test(['-O2'], 0.50, 70000) test(['-O3', '--closure', '1'], 0.60, 50000) @@ -4254,8 +4254,8 @@ def do(name, moar_opts): assert sizes['no_nuthin'] < absolute if '--closure' in opts: # no EXPORTED_RUNTIME_METHODS makes closure much more effective assert sizes['no_nuthin'] < 0.975*sizes['normal'] - test([], 1, 200000) - test(['-O1'], 1, 200000) + test([], 1, 220000) + test(['-O1'], 1, 215000) test(['-O2'], 0.99, 75000) test(['-O3', '--closure', '1'], 0.975, 50000) test(['-O3', '--closure', '2'], 0.975, 41000) # might change now and then From bf4296f8e18d2a56dc6aa56a68b6e81499271380 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 10 Feb 2016 15:34:18 -0800 Subject: [PATCH 04/49] when NO_FILESYSTEM and ASSERTIONS, make a small fake FS object to catch runtime access and show a clear error message --- src/preamble.js | 23 +++++++++++++++++++++++ tests/test_other.py | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/src/preamble.js b/src/preamble.js index 391ef3d56f6fb..0c0f943d14d74 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -1826,4 +1826,27 @@ if (!ENVIRONMENT_IS_PTHREAD) addOnPreRun(function() { if (!ENVIRONMENT_IS_PTHREAD) addOnPreRun(function() { if (typeof SharedArrayBuffer !== 'undefined') { addRunDependency('pthreads'); PThread.allocateUnusedWorkers({{{PTHREAD_POOL_SIZE}}}, function() { removeRunDependency('pthreads'); }); }}); #endif +#if ASSERTIONS +#if NO_FILESYSTEM +var /* show errors on likely calls to FS when it was not included */ FS = { + error: function() { + abort('Filesystem support (FS) was not included. The problem is that you are using files from JS, but files were not used from C/C++, so filesystem support was not auto-included. You can force-include filesystem support with -s FORCE_FILESYSTEM=1'); + }, + init: function() { FS.error() }, + createDataFile: function() { FS.error() }, + createPreloadedFile: function() { FS.error() }, + createLazyFile: function() { FS.error() }, + open: function() { FS.error() }, + mkdev: function() { FS.error() }, + registerDevice: function() { FS.error() }, + analyzePath: function() { FS.error() }, + loadFilesFromDB: function() { FS.error() }, + + ErrnoError: function ErrnoError() { FS.error() }, +}; +Module['FS_createDataFile'] = FS.createDataFile; +Module['FS_createPreloadedFile'] = FS.createPreloadedFile; +#endif +#endif + // === Body === diff --git a/tests/test_other.py b/tests/test_other.py index 8b651e56ef076..7b46b710ca11c 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -6015,3 +6015,43 @@ def test_override_environment(self): assert ('environment is %s? false' % other) in seen, seen print '-- verified proper env is shown' + def test_warn_no_filesystem(self): + WARNING = 'Filesystem support (FS) was not included. The problem is that you are using files from JS, but files were not used from C/C++, so filesystem support was not auto-included. You can force-include filesystem support with -s FORCE_FILESYSTEM=1' + + check_execute([PYTHON, EMCC, path_from_root('tests', 'hello_world.c')]) + seen = run_js('a.out.js', stderr=PIPE) + assert WARNING not in seen + + def test(contents): + open('src.cpp', 'w').write(r''' + #include + #include + int main() { + EM_ASM({ %s }); + printf("hello, world!\n"); + return 0; + } + ''' % contents) + check_execute([PYTHON, EMCC, 'src.cpp']) + self.assertContained(WARNING, run_js('a.out.js', stderr=PIPE, assert_returncode=None)) + + # might appear in handwritten code + test("FS.init()") + test("FS.createPreloadedFile('waka waka, just warning check')"); + test("FS.createDataFile('waka waka, just warning check')"); + test("FS.analyzePath('waka waka, just warning check')"); + test("FS.loadFilesFromDB('waka waka, just warning check')"); + # might appear in filesystem code from a separate script tag + test("Module['FS_createDataFile']('waka waka, just warning check')"); + test("Module['FS_createPreloadedFile']('waka waka, just warning check')"); + + # text is in the source when needed, but when forcing FS, it isn't there + check_execute([PYTHON, EMCC, 'src.cpp']) + self.assertContained(WARNING, open('a.out.js').read()) + check_execute([PYTHON, EMCC, 'src.cpp', '-s', 'FORCE_FILESYSTEM=1']) # forcing FS means no need + self.assertNotContained(WARNING, open('a.out.js').read()) + check_execute([PYTHON, EMCC, 'src.cpp', '-s', 'ASSERTIONS=0']) # no assertions, no need + self.assertNotContained(WARNING, open('a.out.js').read()) + check_execute([PYTHON, EMCC, 'src.cpp', '-O2']) # optimized, so no assertions + self.assertNotContained(WARNING, open('a.out.js').read()) + From b54d09515175b98e7e574bfcb5af5edaf4b905c6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 10 Feb 2016 15:57:15 -0800 Subject: [PATCH 05/49] use demangle support in browser.test_emscripten_log --- tests/test_browser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_browser.py b/tests/test_browser.py index 750c808bad229..37c33b48b406e 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -108,7 +108,7 @@ def test_emscripten_log(self): src = os.path.join(self.get_dir(), 'src.cpp') open(src, 'w').write(self.with_report_result(open(path_from_root('tests', 'emscripten_log', 'emscripten_log.cpp')).read())) - Popen([PYTHON, EMCC, src, '--pre-js', path_from_root('src', 'emscripten-source-map.min.js'), '-g', '-o', 'page.html']).communicate() + Popen([PYTHON, EMCC, src, '--pre-js', path_from_root('src', 'emscripten-source-map.min.js'), '-g', '-o', 'page.html', '-s', 'DEMANGLE_SUPPORT=1']).communicate() self.run_browser('page.html', None, '/report_result?1') def build_native_lzma(self): From 23d8e13ede67be0708a64f4bc0cb47c13f8829ba Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 10 Feb 2016 16:03:17 -0800 Subject: [PATCH 06/49] if we include files, we definitely need filesystem support --- emcc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/emcc.py b/emcc.py index 6d7dbf01590fc..1912e2b5da80e 100755 --- a/emcc.py +++ b/emcc.py @@ -1040,8 +1040,9 @@ def check(input_file): if proxy_to_worker: shared.Settings.PROXY_TO_WORKER = 1 - if use_preload_plugins: - shared.Settings.FORCE_FILESYSTEM = 1 # preload plugins require preload support which is part of the filesystem + if use_preload_plugins or len(preload_files) > 0 or len(embed_files) > 0: + # if we include any files, or intend to use preload plugins, then we definitely need filesystem support + shared.Settings.FORCE_FILESYSTEM = 1 if proxy_to_worker or use_preload_plugins: shared.Settings.DEFAULT_LIBRARY_FUNCS_TO_INCLUDE += ['$Browser'] From 8478ce2c3fed55c7ae5eb3a34d7c57d9ea9ad640 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 10 Feb 2016 16:05:06 -0800 Subject: [PATCH 07/49] browser.test_file_db needs FORCE_FILESYSTEM --- tests/test_browser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_browser.py b/tests/test_browser.py index 37c33b48b406e..3fe7d89b2192b 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1010,7 +1010,7 @@ def test_file_db(self): open('moar.txt', 'w').write(secret) self.btest('file_db.cpp', '1', args=['--preload-file', 'moar.txt', '-DFIRST']) shutil.copyfile('test.html', 'first.html') - self.btest('file_db.cpp', secret) + self.btest('file_db.cpp', secret, args=['-s', 'FORCE_FILESYSTEM=1']) shutil.copyfile('test.html', 'second.html') open('moar.txt', 'w').write('aliantha') self.btest('file_db.cpp', secret, args=['--preload-file', 'moar.txt']) # even with a file there, we load over it From b8cf39d7c0b9d9870f895df6051909cdb5008cca Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 10 Feb 2016 16:11:28 -0800 Subject: [PATCH 08/49] browser.test_locate_file needs FORCE_FILESYSTEM --- tests/test_browser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_browser.py b/tests/test_browser.py index 3fe7d89b2192b..afa0ee6dab80d 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -2158,8 +2158,8 @@ def test_locate_file(self): ''') - def in_html(expected): - Popen([PYTHON, EMCC, 'src.cpp', '-O2', '-g', '--shell-file', 'shell.html', '--pre-js', 'data.js', '-o', 'page.html']).communicate() + def in_html(expected, args=[]): + Popen([PYTHON, EMCC, 'src.cpp', '-O2', '-g', '--shell-file', 'shell.html', '--pre-js', 'data.js', '-o', 'page.html'] + args).communicate() self.run_browser('page.html', None, '/report_result?' + expected) in_html('1') @@ -2179,7 +2179,7 @@ def in_html(expected): } ''')) - in_html('200') + in_html('200', ['-s', 'FORCE_FILESYSTEM=1']) def test_glfw3(self): self.btest(path_from_root('tests', 'glfw3.c'), args=['-s', 'LEGACY_GL_EMULATION=1', '-s', 'USE_GLFW=3'], expected='1') From ffd536431815b521dada0e2ef14aec711660d642 Mon Sep 17 00:00:00 2001 From: Tanner Rogalsky Date: Wed, 10 Feb 2016 20:25:23 -0500 Subject: [PATCH 09/49] A single float is passed. `values` is a typo and the access of `value` in this function throws an error. --- src/library_openal.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/library_openal.js b/src/library_openal.js index b7422adc7f9c0..80459f77317bd 100644 --- a/src/library_openal.js +++ b/src/library_openal.js @@ -1169,7 +1169,7 @@ var LibraryOpenAL = { } }, - alGetListenerf: function(pname, values) { + alGetListenerf: function(pname, value) { if (!AL.currentContext) { #if OPENAL_DEBUG console.error("alGetListenerf called without a valid context"); From dc113eb672bc3f336fa7b07fd54eaaa4e6b5b3f3 Mon Sep 17 00:00:00 2001 From: Tanner Rogalsky Date: Wed, 10 Feb 2016 20:26:43 -0500 Subject: [PATCH 10/49] Replacing the AudioParam with a float is incorrect. The `value` field on the AudioParam should be set instead. --- src/library_openal.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/library_openal.js b/src/library_openal.js index 80459f77317bd..80d07557a9d4d 100644 --- a/src/library_openal.js +++ b/src/library_openal.js @@ -1178,7 +1178,7 @@ var LibraryOpenAL = { } switch (pname) { case 0x100A /* AL_GAIN */: - {{{ makeSetValue('value', '0', 'AL.currentContext.gain.gain', 'float') }}} + {{{ makeSetValue('value', '0', 'AL.currentContext.gain.gain.value', 'float') }}} break; default: #if OPENAL_DEBUG From 6898c4bdd8db4989cbe9d7480913c219d6fb74e6 Mon Sep 17 00:00:00 2001 From: Tanner Rogalsky Date: Wed, 10 Feb 2016 20:27:31 -0500 Subject: [PATCH 11/49] Sources should respect the global gain setting, regardless of if they've been set to be relative or not. --- src/library_openal.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/library_openal.js b/src/library_openal.js index 80d07557a9d4d..8f1b3bf46ba46 100644 --- a/src/library_openal.js +++ b/src/library_openal.js @@ -472,7 +472,7 @@ var LibraryOpenAL = { // Disconnect from the panner. src.gain.disconnect(); - src.gain.connect(AL.currentContext.ctx.destination); + src.gain.connect(AL.currentContext.gain); } } else if (value === 0 /* AL_FALSE */) { if (!src.panner) { @@ -484,7 +484,7 @@ var LibraryOpenAL = { panner.rolloffFactor = src.rolloffFactor; panner.setPosition(src.position[0], src.position[1], src.position[2]); panner.setVelocity(src.velocity[0], src.velocity[1], src.velocity[2]); - panner.connect(AL.currentContext.ctx.destination); + panner.connect(AL.currentContext.gain); // Disconnect from the default source. src.gain.disconnect(); From d614b5c0a40498e6514c4c1300cbf76c38d3203b Mon Sep 17 00:00:00 2001 From: Tanner Rogalsky Date: Wed, 10 Feb 2016 20:28:21 -0500 Subject: [PATCH 12/49] Add Tanner Rogalsky to the AUTHORS file. --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 46084c30039d4..3e5e0750950cb 100644 --- a/AUTHORS +++ b/AUTHORS @@ -231,4 +231,5 @@ a license to everyone to use it as detailed in LICENSE.) * Noam T.Cohen * Nick Shin * Gregg Tavares +* Tanner Rogalsky From 74e56381ee2d7cd711e88f08def3d714b246b554 Mon Sep 17 00:00:00 2001 From: Tanner Rogalsky Date: Thu, 11 Feb 2016 11:22:59 -0500 Subject: [PATCH 13/49] Setting the global gain value should not replace the GainNode but set it's value instead. --- src/library_openal.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/library_openal.js b/src/library_openal.js index 8f1b3bf46ba46..9e24ede6a9c08 100644 --- a/src/library_openal.js +++ b/src/library_openal.js @@ -1254,7 +1254,7 @@ var LibraryOpenAL = { } switch (param) { case 0x100A /* AL_GAIN */: - AL.currentContext.gain.value = value; + AL.currentContext.gain.gain.value = value; break; default: #if OPENAL_DEBUG From 4a99cc43cbf6873e909030a0d4ff5b403a6f8ecc Mon Sep 17 00:00:00 2001 From: Tanner Rogalsky Date: Thu, 11 Feb 2016 11:29:34 -0500 Subject: [PATCH 14/49] Add tests for `alGetListenerf` and `alListenerf`. --- tests/openal_playback.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/openal_playback.cpp b/tests/openal_playback.cpp index 46c4f8a365bbf..116ed004d28c0 100644 --- a/tests/openal_playback.cpp +++ b/tests/openal_playback.cpp @@ -59,6 +59,16 @@ int main() { alListenerfv(AL_VELOCITY, listenerVel); alListenerfv(AL_ORIENTATION, listenerOri); + // check getting and setting global gain + ALfloat volume; + alGetListenerf(AL_GAIN, &volume); + assert(volume == 1.0); + alListenerf(AL_GAIN, 0.0); + alGetListenerf(AL_GAIN, &volume); + assert(volume == 0.0); + + alListenerf(AL_GAIN, 1.0); // reset gain to default + ALuint buffers[1]; alGenBuffers(1, buffers); From e9d06af51a9477fdb3d2165c6bcdad51f5ca533f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 11 Feb 2016 10:33:56 -0800 Subject: [PATCH 15/49] remove some spurious deps in library_openal.js --- src/library_openal.js | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/library_openal.js b/src/library_openal.js index 8f1b3bf46ba46..4e66aa861bf39 100644 --- a/src/library_openal.js +++ b/src/library_openal.js @@ -860,7 +860,6 @@ var LibraryOpenAL = { } }, - alSourcePlay__deps: ['setSourceState'], alSourcePlay: function(source) { if (!AL.currentContext) { #if OPENAL_DEBUG @@ -879,7 +878,6 @@ var LibraryOpenAL = { AL.setSourceState(src, 0x1012 /* AL_PLAYING */); }, - alSourceStop__deps: ['setSourceState'], alSourceStop: function(source) { if (!AL.currentContext) { #if OPENAL_DEBUG @@ -898,7 +896,6 @@ var LibraryOpenAL = { AL.setSourceState(src, 0x1014 /* AL_STOPPED */); }, - alSourceRewind__deps: ['setSourceState'], alSourceRewind: function(source) { if (!AL.currentContext) { #if OPENAL_DEBUG @@ -920,7 +917,6 @@ var LibraryOpenAL = { AL.setSourceState(src, 0x1011 /* AL_INITIAL */); }, - alSourcePause__deps: ['setSourceState'], alSourcePause: function(source) { if (!AL.currentContext) { #if OPENAL_DEBUG From 6d41ea20fedf8175326c04554301c3e664b5da9a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 11 Feb 2016 11:39:53 -0800 Subject: [PATCH 16/49] fix interactive.test_html5_mouse --- tests/test_html5_mouse.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/test_html5_mouse.c b/tests/test_html5_mouse.c index 462cb9acda4de..5a9dd565e084f 100644 --- a/tests/test_html5_mouse.c +++ b/tests/test_html5_mouse.c @@ -69,12 +69,9 @@ EM_BOOL mouse_callback(int eventType, const EmscriptenMouseEvent *e, void *userD if (e->screenX != 0 && e->screenY != 0 && e->clientX != 0 && e->clientY != 0 && e->canvasX != 0 && e->canvasY != 0 && e->targetX != 0 && e->targetY != 0) { - if (e->buttons != 0) - { - if (eventType == EMSCRIPTEN_EVENT_CLICK) gotClick = 1; - if (eventType == EMSCRIPTEN_EVENT_MOUSEDOWN) gotMouseDown = 1; - if (eventType == EMSCRIPTEN_EVENT_DBLCLICK) gotDblClick = 1; - } + if (eventType == EMSCRIPTEN_EVENT_CLICK) gotClick = 1; + if (eventType == EMSCRIPTEN_EVENT_MOUSEDOWN && e->buttons != 0) gotMouseDown = 1; + if (eventType == EMSCRIPTEN_EVENT_DBLCLICK) gotDblClick = 1; if (eventType == EMSCRIPTEN_EVENT_MOUSEUP) gotMouseUp = 1; if (eventType == EMSCRIPTEN_EVENT_MOUSEMOVE && (e->movementX != 0 || e->movementY != 0)) gotMouseMove = 1; } @@ -107,6 +104,8 @@ EM_BOOL wheel_callback(int eventType, const EmscriptenWheelEvent *e, void *userD int main() { + emscripten_set_canvas_size(400, 300); + EMSCRIPTEN_RESULT ret = emscripten_set_click_callback(0, 0, 1, mouse_callback); TEST_RESULT(emscripten_set_click_callback); ret = emscripten_set_mousedown_callback(0, 0, 1, mouse_callback); From 81e672d6967d6460878acc83924db0146c62a4a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Fri, 12 Feb 2016 01:13:54 +0200 Subject: [PATCH 17/49] Make _mm_prefetch() an inline function to consume its arguments. Closes #4087. --- system/include/emscripten/xmmintrin.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/system/include/emscripten/xmmintrin.h b/system/include/emscripten/xmmintrin.h index 52641e58a84b9..6266dea13bd97 100644 --- a/system/include/emscripten/xmmintrin.h +++ b/system/include/emscripten/xmmintrin.h @@ -125,7 +125,12 @@ _mm_store_ps(float *__p, __m128 __a) #define _MM_HINT_T2 1 #define _MM_HINT_NTA 0 // No prefetch available, dummy it out. -#define _mm_prefetch(a, sel) ((void)0) +static __inline__ void __attribute__((__always_inline__)) +_mm_prefetch(void *__p, int __i) +{ + ((void)__p); + ((void)__i); +} static __inline__ void __attribute__((__always_inline__)) _mm_sfence(void) From 98b4cf42ee388bcd048be088af425a7cb24d5b36 Mon Sep 17 00:00:00 2001 From: Arnab Choudhury Date: Tue, 5 Jan 2016 09:58:20 -0800 Subject: [PATCH 18/49] Duplicate function elimination This change adds support for duplicate function elimination (DFE) to the JavaScript optimizer. A new JS file has been added - eliminate-duplicate-functions.js - which is used to postprocess the output generated by Emscripten. We add a new file, rather than augmenting the existing JS optimizer file, for a variety of reasons - pass independence, reduced coupling between Python scripts and the JS optimizer, etc. We introduce a multipass algorithm in which each pass consists of the following four phases: Phase 1 - identify duplicate functions using a hash of the function body Phase 2 - identify variable names that would conflict after renaming function calls Phase 3 - generate mapping from equivalent functions to their replacement function - use the information from Phase 2 to ensure that the replacement function is not a variable name Phase 4 - use the mapping generated in Phase 3 to perform the reduction NOTE: In some rare cases, we may actually not be able to move on from Phase 3 if we find that we cannot generate a mapping because of conflicts with variable names. One pass can reveal new sets of identical functions which in turn can be reduced by further passes. Empirically, four or five passes are sufficient to eliminate all duplicate functions. Internally, therefore, the elimination will perform 5 passes by default. This can be overridden by setting ELIMINATE_DUPLICATE_FUNCTIONS_PASSES to 1 in settings.js or on the Emscripten command line. Generated asm.js is broken into several batches (at function boundaries) to enable parallelization of the elimination. This saves on memory and makes use of more CPU cores to save on build time. A number of tests have been introduced to test this functionality as well. The change also introduces various tweaks to the amount of diagnostic information that is dumped out by the JavaScript optimizer. Verbose logging is now only enabled in debug mode (via the EMCC_LOG_DEBUG environment variable). We also dump backtraces on encountering unhandled exceptions: this is useful when Emscripten runs as part of a large build process. In order to view detailed information about which functions were merged, set the ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS value to 1 in settings.js or via the Emscripten command line. This generates a log file in the same directory as the generated JavaScript listing the sets of merged functions. This can be decoded using the symbol map generated by Emscripten. It is, therefore, recommended that developers enable symbol map generation when attempting to modify or debug this feature. Since DFE increases build time significantly, it is disabled by default. It can be enabled by setting ELIMINATE_DUPLICATE_FUNCTIONS to 1 either in settings.js or by adding "-s ELIMINATE_DUPLICATE_FUNCTIONS=1" on the Emscripten command line. The poppler test has been updated to also run with the ELIMINATE_DUPLICATE_FUNCTIONS setting set to 1. Improvements/future work It has been observed that on average we experience a code size reduction of 25% when transpiling large C++ code bases. Typically, C++ code that makes heavy use of templates will experience the greatest reduction in code size. There are several directions that future work might take: * Deduplication of code across templates: e.g. reduction of std::vector and std::vector to single instantiations of template code when appropriate * Histogram-based selection of candidates for replacement: improved code size should be attainable by assigning the shortest identifiers to the most frequently referenced functions (in the style of Huffman coding) * Convergence: the five-pass default chosen in this implementation is based on empirical observations on a 150,000LOC C++ code base * Candidate selection: this will, most likely, influence both the convergence time (i.e. number of passes) and the code size reduction; currently, when selecting candidates, we choose the shortest identifier from the list that is not also a variable name --- AUTHORS | 4 +- emcc.py | 5 + src/settings.js | 5 + ...iminator-double-parsed-correctly-output.js | 14 + ...tion-eliminator-double-parsed-correctly.js | 22 + ...n-eliminator-replace-array-value-output.js | 25 + ...ator-replace-array-value-with-hash-info.js | 32 ++ ...function-eliminator-replace-array-value.js | 23 + ...ace-function-call-output-with-hash-info.js | 22 + ...eliminator-replace-function-call-output.js | 14 + ...replace-function-call-two-passes-output.js | 10 + ...or-replace-function-call-with-hash-info.js | 27 + ...nction-eliminator-replace-function-call.js | 20 + ...-replace-object-value-assignment-output.js | 18 + ...-object-value-assignment-with-hash-info.js | 34 ++ ...minator-replace-object-value-assignment.js | 24 + ...liminator-replace-variable-value-output.js | 16 + ...r-replace-variable-value-with-hash-info.js | 32 ++ ...ction-eliminator-replace-variable-value.js | 22 + .../test-function-eliminator-simple-output.js | 6 + ...nction-eliminator-simple-with-hash-info.js | 15 + .../test-function-eliminator-simple.js | 12 + ...nction-eliminator-variable-clash-output.js | 18 + ...liminator-variable-clash-with-hash-info.js | 31 + ...test-function-eliminator-variable-clash.js | 21 + tests/parallel_test_core.py | 11 +- tests/runner.py | 27 +- tests/test_core.py | 72 ++- tests/test_other.py | 127 ++++ tools/duplicate_function_eliminator.py | 378 ++++++++++++ tools/eliminate-duplicate-functions.js | 541 ++++++++++++++++++ tools/shared.py | 5 + 32 files changed, 1603 insertions(+), 30 deletions(-) create mode 100644 tests/optimizer/test-function-eliminator-double-parsed-correctly-output.js create mode 100644 tests/optimizer/test-function-eliminator-double-parsed-correctly.js create mode 100644 tests/optimizer/test-function-eliminator-replace-array-value-output.js create mode 100644 tests/optimizer/test-function-eliminator-replace-array-value-with-hash-info.js create mode 100644 tests/optimizer/test-function-eliminator-replace-array-value.js create mode 100644 tests/optimizer/test-function-eliminator-replace-function-call-output-with-hash-info.js create mode 100644 tests/optimizer/test-function-eliminator-replace-function-call-output.js create mode 100644 tests/optimizer/test-function-eliminator-replace-function-call-two-passes-output.js create mode 100644 tests/optimizer/test-function-eliminator-replace-function-call-with-hash-info.js create mode 100644 tests/optimizer/test-function-eliminator-replace-function-call.js create mode 100644 tests/optimizer/test-function-eliminator-replace-object-value-assignment-output.js create mode 100644 tests/optimizer/test-function-eliminator-replace-object-value-assignment-with-hash-info.js create mode 100644 tests/optimizer/test-function-eliminator-replace-object-value-assignment.js create mode 100644 tests/optimizer/test-function-eliminator-replace-variable-value-output.js create mode 100644 tests/optimizer/test-function-eliminator-replace-variable-value-with-hash-info.js create mode 100644 tests/optimizer/test-function-eliminator-replace-variable-value.js create mode 100644 tests/optimizer/test-function-eliminator-simple-output.js create mode 100644 tests/optimizer/test-function-eliminator-simple-with-hash-info.js create mode 100644 tests/optimizer/test-function-eliminator-simple.js create mode 100644 tests/optimizer/test-function-eliminator-variable-clash-output.js create mode 100644 tests/optimizer/test-function-eliminator-variable-clash-with-hash-info.js create mode 100644 tests/optimizer/test-function-eliminator-variable-clash.js create mode 100644 tools/duplicate_function_eliminator.py create mode 100644 tools/eliminate-duplicate-functions.js diff --git a/AUTHORS b/AUTHORS index 3e5e0750950cb..045a740dacb2b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -232,4 +232,6 @@ a license to everyone to use it as detailed in LICENSE.) * Nick Shin * Gregg Tavares * Tanner Rogalsky - +* Richard Cook (copyright owned by Tableau Software, Inc.) +* Arnab Choudhury (copyright owned by Tableau Software, Inc.) +* Charles Vaughn (copyright owned by Tableau Software, Inc.) diff --git a/emcc.py b/emcc.py index 1912e2b5da80e..3494343704799 100755 --- a/emcc.py +++ b/emcc.py @@ -1634,6 +1634,11 @@ def do_minify(): # minifies the code. this is also when we do certain optimizati else: JSOptimizer.queue += ['registerize'] + # NOTE: Important that this comes after registerize/registerizeHarder + if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS and opt_level >= 2: + JSOptimizer.flush() + shared.Building.eliminate_duplicate_funcs(final) + if not shared.Settings.EMTERPRETIFY: do_minify() diff --git a/src/settings.js b/src/settings.js index 674d6f226f0d6..2dc056183eafb 100644 --- a/src/settings.js +++ b/src/settings.js @@ -681,4 +681,9 @@ var PTHREADS_PROFILING = 0; // True when building with --threadprofiler var MAX_GLOBAL_ALIGN = -1; // received from the backend +// Duplicate function elimination +var ELIMINATE_DUPLICATE_FUNCTIONS = 0; // disabled by default +var ELIMINATE_DUPLICATE_FUNCTIONS_PASSES = 5; +var ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS = 0; + // Reserved: variables containing POINTER_MASKING. diff --git a/tests/optimizer/test-function-eliminator-double-parsed-correctly-output.js b/tests/optimizer/test-function-eliminator-double-parsed-correctly-output.js new file mode 100644 index 0000000000000..adab68feb486a --- /dev/null +++ b/tests/optimizer/test-function-eliminator-double-parsed-correctly-output.js @@ -0,0 +1,14 @@ +// EMSCRIPTEN_START_ASM +var asm = (function(global, env, buffer) { + "use asm"; + var e = 0; + +// EMSCRIPTEN_START_FUNCS +function a() { + var c = 0.0; + return 0; +} +// EMSCRIPTEN_END_FUNCS + var f = 0; +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); +// EMSCRIPTEN_END_ASM diff --git a/tests/optimizer/test-function-eliminator-double-parsed-correctly.js b/tests/optimizer/test-function-eliminator-double-parsed-correctly.js new file mode 100644 index 0000000000000..3ebb47215ab05 --- /dev/null +++ b/tests/optimizer/test-function-eliminator-double-parsed-correctly.js @@ -0,0 +1,22 @@ +// EMSCRIPTEN_START_ASM +var asm = (function(global, env, buffer) { + "use asm"; + var e = 0; + +// EMSCRIPTEN_START_FUNCS + function a() { + var c = +0; + return 0; + } + function b() { + var c = +0; + return 0; + } +// EMSCRIPTEN_END_FUNCS + var f = 0; +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); +// EMSCRIPTEN_END_ASM +// EMSCRIPTEN_GENERATED_FUNCTIONS + + + diff --git a/tests/optimizer/test-function-eliminator-replace-array-value-output.js b/tests/optimizer/test-function-eliminator-replace-array-value-output.js new file mode 100644 index 0000000000000..f320d4589f958 --- /dev/null +++ b/tests/optimizer/test-function-eliminator-replace-array-value-output.js @@ -0,0 +1,25 @@ +// EMSCRIPTEN_START_ASM +var asm = (function(global, env, buffer) { + "use asm"; + +// EMSCRIPTEN_START_FUNCS +function d() { + a(); + e(); + return; +} + +function c() { + a(); + return; +} + +function a() { + return 0; +} + +// EMSCRIPTEN_END_FUNCS + + var f = [ a ]; +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); +// EMSCRIPTEN_END_ASM diff --git a/tests/optimizer/test-function-eliminator-replace-array-value-with-hash-info.js b/tests/optimizer/test-function-eliminator-replace-array-value-with-hash-info.js new file mode 100644 index 0000000000000..56946b51da1e8 --- /dev/null +++ b/tests/optimizer/test-function-eliminator-replace-array-value-with-hash-info.js @@ -0,0 +1,32 @@ +var asm = (function(global, env, buffer) { +"use asm"; +function a() +{ +return 0; +} + +function b() +{ +return 0; +} + +function c() +{ + a(); + return; +} + +function d() +{ + b(); + + // We expect that b gets replaced by a below + var f = [b]; + e(); + + return; +} + +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); + +// {"b":"a"} diff --git a/tests/optimizer/test-function-eliminator-replace-array-value.js b/tests/optimizer/test-function-eliminator-replace-array-value.js new file mode 100644 index 0000000000000..d10d3cf3cd0b4 --- /dev/null +++ b/tests/optimizer/test-function-eliminator-replace-array-value.js @@ -0,0 +1,23 @@ +// EMSCRIPTEN_START_ASM +var asm = (function(global, env, buffer) { + "use asm"; +// EMSCRIPTEN_START_FUNCS + function a() { + return 0; + } + function b() { + return 0; + } + function c() { + a(); + return; + } + function d() { + b(); + e(); + return; + } +// EMSCRIPTEN_END_FUNCS + var f = [ b ]; +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); +// EMSCRIPTEN_END_ASM \ No newline at end of file diff --git a/tests/optimizer/test-function-eliminator-replace-function-call-output-with-hash-info.js b/tests/optimizer/test-function-eliminator-replace-function-call-output-with-hash-info.js new file mode 100644 index 0000000000000..381dbe37a8356 --- /dev/null +++ b/tests/optimizer/test-function-eliminator-replace-function-call-output-with-hash-info.js @@ -0,0 +1,22 @@ +var asm = (function(global, env, buffer) { +"use asm"; +function a() +{ +return 0; +} + +function c() +{ + a(); + return; +} + +function d() +{ + a(); + return; +} + +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); + +// {"d":"c"} diff --git a/tests/optimizer/test-function-eliminator-replace-function-call-output.js b/tests/optimizer/test-function-eliminator-replace-function-call-output.js new file mode 100644 index 0000000000000..2ddfffb20732d --- /dev/null +++ b/tests/optimizer/test-function-eliminator-replace-function-call-output.js @@ -0,0 +1,14 @@ +var asm = (function(global, env, buffer) { + "use asm"; + function a() { + return 0; + } + function c() { + a(); + return; + } + function d() { + a(); + return; + } +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); diff --git a/tests/optimizer/test-function-eliminator-replace-function-call-two-passes-output.js b/tests/optimizer/test-function-eliminator-replace-function-call-two-passes-output.js new file mode 100644 index 0000000000000..1d92f637aff9f --- /dev/null +++ b/tests/optimizer/test-function-eliminator-replace-function-call-two-passes-output.js @@ -0,0 +1,10 @@ +var asm = (function(global, env, buffer) { + "use asm"; + function a() { + return 0; + } + function c() { + a(); + return; + } +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); diff --git a/tests/optimizer/test-function-eliminator-replace-function-call-with-hash-info.js b/tests/optimizer/test-function-eliminator-replace-function-call-with-hash-info.js new file mode 100644 index 0000000000000..4d120095a97fe --- /dev/null +++ b/tests/optimizer/test-function-eliminator-replace-function-call-with-hash-info.js @@ -0,0 +1,27 @@ +var asm = (function(global, env, buffer) { +"use asm"; +function a() +{ +return 0; +} + +function b() +{ +return 0; +} + +function c() +{ + a(); + return; +} + +function d() +{ + b(); + return; +} + +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); + +// {"b":"a"} diff --git a/tests/optimizer/test-function-eliminator-replace-function-call.js b/tests/optimizer/test-function-eliminator-replace-function-call.js new file mode 100644 index 0000000000000..894ed56f87e32 --- /dev/null +++ b/tests/optimizer/test-function-eliminator-replace-function-call.js @@ -0,0 +1,20 @@ +var asm = (function(global, env, buffer) { + "use asm"; + function a() { + return 0; + } + function b() { + return 0; + } + function c() { + a(); + return; + } + function d() { + b(); + return; + } +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); + + + diff --git a/tests/optimizer/test-function-eliminator-replace-object-value-assignment-output.js b/tests/optimizer/test-function-eliminator-replace-object-value-assignment-output.js new file mode 100644 index 0000000000000..da4ef7be78d80 --- /dev/null +++ b/tests/optimizer/test-function-eliminator-replace-object-value-assignment-output.js @@ -0,0 +1,18 @@ +var asm = (function(global, env, buffer) { + "use asm"; + function a() { + return 0; + } + function c() { + a(); + return; + } + function d() { + a(); + var f = { + g: a + }; + e(); + return; + } +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); diff --git a/tests/optimizer/test-function-eliminator-replace-object-value-assignment-with-hash-info.js b/tests/optimizer/test-function-eliminator-replace-object-value-assignment-with-hash-info.js new file mode 100644 index 0000000000000..1d9147415e286 --- /dev/null +++ b/tests/optimizer/test-function-eliminator-replace-object-value-assignment-with-hash-info.js @@ -0,0 +1,34 @@ +var asm = (function(global, env, buffer) { +"use asm"; +function a() +{ +return 0; +} + +function b() +{ +return 0; +} + +function c() +{ + a(); + return; +} + +function d() +{ + b(); + + // We expect that b gets replaced by a below + var f = { + g: b + }; + e(); + + return; +} + +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); + +// {"b":"a"} diff --git a/tests/optimizer/test-function-eliminator-replace-object-value-assignment.js b/tests/optimizer/test-function-eliminator-replace-object-value-assignment.js new file mode 100644 index 0000000000000..6bd51dd9696f5 --- /dev/null +++ b/tests/optimizer/test-function-eliminator-replace-object-value-assignment.js @@ -0,0 +1,24 @@ +var asm = (function(global, env, buffer) { + "use asm"; + function a() { + return 0; + } + function b() { + return 0; + } + function c() { + a(); + return; + } + function d() { + b(); + var f = { + g: b + }; + e(); + return; + } +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); + + + diff --git a/tests/optimizer/test-function-eliminator-replace-variable-value-output.js b/tests/optimizer/test-function-eliminator-replace-variable-value-output.js new file mode 100644 index 0000000000000..de2ae5a17c94c --- /dev/null +++ b/tests/optimizer/test-function-eliminator-replace-variable-value-output.js @@ -0,0 +1,16 @@ +var asm = (function(global, env, buffer) { + "use asm"; + function a() { + return 0; + } + function c() { + a(); + return; + } + function d() { + a(); + var e = a; + e(); + return; + } +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); diff --git a/tests/optimizer/test-function-eliminator-replace-variable-value-with-hash-info.js b/tests/optimizer/test-function-eliminator-replace-variable-value-with-hash-info.js new file mode 100644 index 0000000000000..34f0292118df7 --- /dev/null +++ b/tests/optimizer/test-function-eliminator-replace-variable-value-with-hash-info.js @@ -0,0 +1,32 @@ +var asm = (function(global, env, buffer) { +"use asm"; +function a() +{ +return 0; +} + +function b() +{ +return 0; +} + +function c() +{ + a(); + return; +} + +function d() +{ + b(); + + // We expect that b gets replaced by a below + var e = b; + e(); + + return; +} + +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); + +// {"b" : "a"} diff --git a/tests/optimizer/test-function-eliminator-replace-variable-value.js b/tests/optimizer/test-function-eliminator-replace-variable-value.js new file mode 100644 index 0000000000000..5eb6664124597 --- /dev/null +++ b/tests/optimizer/test-function-eliminator-replace-variable-value.js @@ -0,0 +1,22 @@ +var asm = (function(global, env, buffer) { + "use asm"; + function a() { + return 0; + } + function b() { + return 0; + } + function c() { + a(); + return; + } + function d() { + b(); + var e = b; + e(); + return; + } +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); + + + diff --git a/tests/optimizer/test-function-eliminator-simple-output.js b/tests/optimizer/test-function-eliminator-simple-output.js new file mode 100644 index 0000000000000..2fa7c4ecace97 --- /dev/null +++ b/tests/optimizer/test-function-eliminator-simple-output.js @@ -0,0 +1,6 @@ +var asm = (function(global, env, buffer) { + "use asm"; + function a() { + return 0; + } +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); diff --git a/tests/optimizer/test-function-eliminator-simple-with-hash-info.js b/tests/optimizer/test-function-eliminator-simple-with-hash-info.js new file mode 100644 index 0000000000000..04a8576894913 --- /dev/null +++ b/tests/optimizer/test-function-eliminator-simple-with-hash-info.js @@ -0,0 +1,15 @@ +var asm = (function(global, env, buffer) { +"use asm"; +function a() +{ +return 0; +} + +function b() +{ +return 0; +} + +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); + +// {"b":"a"} diff --git a/tests/optimizer/test-function-eliminator-simple.js b/tests/optimizer/test-function-eliminator-simple.js new file mode 100644 index 0000000000000..803047a6cf25b --- /dev/null +++ b/tests/optimizer/test-function-eliminator-simple.js @@ -0,0 +1,12 @@ +var asm = (function(global, env, buffer) { + "use asm"; + function a() { + return 0; + } + function b() { + return 0; + } +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); + + + diff --git a/tests/optimizer/test-function-eliminator-variable-clash-output.js b/tests/optimizer/test-function-eliminator-variable-clash-output.js new file mode 100644 index 0000000000000..55c8ef83298dc --- /dev/null +++ b/tests/optimizer/test-function-eliminator-variable-clash-output.js @@ -0,0 +1,18 @@ +var asm = (function(global, env, buffer) { + "use asm"; + function a() { + return 0; + } + function b() { + return 0; + } + function c() { + a(); + return; + } + function d() { + var a = 0; + b(); + return; + } +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); diff --git a/tests/optimizer/test-function-eliminator-variable-clash-with-hash-info.js b/tests/optimizer/test-function-eliminator-variable-clash-with-hash-info.js new file mode 100644 index 0000000000000..f97be66b5da1f --- /dev/null +++ b/tests/optimizer/test-function-eliminator-variable-clash-with-hash-info.js @@ -0,0 +1,31 @@ +var asm = (function(global, env, buffer) { +"use asm"; +function a() +{ +return 0; +} + +function b() +{ +return 0; +} + +function c() +{ + a(); + return; +} + +function d() +{ + // Because a is used both as a variable and a function, we will + // not use a as a candidate for replacement, nor will we replace + // calls to b with a. + var a = 0; + b(); + return; +} + +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); + +// {} diff --git a/tests/optimizer/test-function-eliminator-variable-clash.js b/tests/optimizer/test-function-eliminator-variable-clash.js new file mode 100644 index 0000000000000..c0f5c17143c45 --- /dev/null +++ b/tests/optimizer/test-function-eliminator-variable-clash.js @@ -0,0 +1,21 @@ +var asm = (function(global, env, buffer) { + "use asm"; + function a() { + return 0; + } + function b() { + return 0; + } + function c() { + a(); + return; + } + function d() { + var a = 0; + b(); + return; + } +})(Module.asmGlobalArg, Module.asmLibraryArg, buffer); + + + diff --git a/tests/parallel_test_core.py b/tests/parallel_test_core.py index 56520e00017d7..4e06f931d88a8 100755 --- a/tests/parallel_test_core.py +++ b/tests/parallel_test_core.py @@ -14,7 +14,16 @@ assert not os.environ.get('EM_SAVE_DIR'), 'Need separate directories to avoid the parallel tests clashing' # run slower ones first, to optimize total time -optimal_order = ['asm2i', 'asm2nn', 'asm3', 'asm2', 'asm2g', 'asm2f', 'asm1', 'default'] +optimal_order = [ + 'asm2i', + 'asm2nn', + 'asm3', + 'asm2', + 'asm2g', + 'asm2f', + 'asm1', + 'default' +] assert set(optimal_order) == set(test_modes), 'need to update the list of slowest modes' # set up a background thread to report progress diff --git a/tests/runner.py b/tests/runner.py index 03e1ee43a3b03..7d676c2c0e9ac 100755 --- a/tests/runner.py +++ b/tests/runner.py @@ -96,7 +96,16 @@ def path_from_root(*pathelems): # Core test runner class, shared between normal tests and benchmarks checked_sanity = False -test_modes = ['default', 'asm1', 'asm2', 'asm3', 'asm2f', 'asm2g', 'asm2i', 'asm2nn'] +test_modes = [ + 'default', + 'asm1', + 'asm2', + 'asm3', + 'asm2f', + 'asm2g', + 'asm2i', + 'asm2nn' +] test_index = 0 use_all_engines = os.environ.get('EM_ALL_ENGINES') # generally js engines are equivalent, testing 1 is enough. set this @@ -360,6 +369,22 @@ def get_func(self, src, name): t += 1 assert t < len(src) + def count_funcs(self, javascript_file): + num_funcs = 0 + start_tok = "// EMSCRIPTEN_START_FUNCS" + end_tok = "// EMSCRIPTEN_END_FUNCS" + start_off = 0 + end_off = 0 + + with open (javascript_file, 'rt') as fin: + blob = "".join(fin.readlines()) + start_off = blob.find(start_tok) + len(start_tok) + end_off = blob.find(end_tok) + asm_chunk = blob[start_off:end_off] + num_funcs = asm_chunk.count('function ') + + return num_funcs + def run_generated_code(self, engine, filename, args=[], check_timeout=True, output_nicerizer=None, assert_returncode=0): stdout = os.path.join(self.get_dir(), 'stdout') # use files, as PIPE can get too full and hang us stderr = os.path.join(self.get_dir(), 'stderr') diff --git a/tests/test_core.py b/tests/test_core.py index e2aaf60dd2eb4..8ce7d505b8eda 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -6164,21 +6164,23 @@ def test(): def test_poppler(self): if WINDOWS: return self.skip('test_poppler depends on freetype, which uses a ./configure script to build and therefore currently only runs on Linux and OS X.') - Settings.NO_EXIT_RUNTIME = 1 - Building.COMPILER_TEST_OPTS += [ - '-I' + path_from_root('tests', 'freetype', 'include'), - '-I' + path_from_root('tests', 'poppler', 'include') - ] + def test(): + Settings.NO_EXIT_RUNTIME = 1 - Settings.INVOKE_RUN = 0 # We append code that does run() ourselves + Building.COMPILER_TEST_OPTS += [ + '-I' + path_from_root('tests', 'freetype', 'include'), + '-I' + path_from_root('tests', 'poppler', 'include') + ] - # See post(), below - input_file = open(os.path.join(self.get_dir(), 'paper.pdf.js'), 'w') - input_file.write(str(map(ord, open(path_from_root('tests', 'poppler', 'paper.pdf'), 'rb').read()))) - input_file.close() + Settings.INVOKE_RUN = 0 # We append code that does run() ourselves - post = ''' + # See post(), below + input_file = open(os.path.join(self.get_dir(), 'paper.pdf.js'), 'w') + input_file.write(str(map(ord, open(path_from_root('tests', 'poppler', 'paper.pdf'), 'rb').read()))) + input_file.close() + + post = ''' def process(filename): # To avoid loading this large file to memory and altering it, we simply append to the end src = open(filename, 'a') @@ -6191,28 +6193,44 @@ def process(filename): ) src.close() ''' + + #fontconfig = self.get_library('fontconfig', [os.path.join('src', '.libs', 'libfontconfig.a')]) # Used in file, but not needed, mostly - #fontconfig = self.get_library('fontconfig', [os.path.join('src', '.libs', 'libfontconfig.a')]) # Used in file, but not needed, mostly + freetype = self.get_freetype() - freetype = self.get_freetype() + poppler = self.get_library('poppler', + [os.path.join('utils', 'pdftoppm.o'), + os.path.join('utils', 'parseargs.o'), + os.path.join('poppler', '.libs', 'libpoppler.a')], + env_init={ 'FONTCONFIG_CFLAGS': ' ', 'FONTCONFIG_LIBS': ' ' }, + configure_args=['--disable-libjpeg', '--disable-libpng', '--disable-poppler-qt', '--disable-poppler-qt4', '--disable-cms', '--disable-cairo-output', '--disable-abiword-output', '--enable-shared=no']) - poppler = self.get_library('poppler', - [os.path.join('utils', 'pdftoppm.o'), - os.path.join('utils', 'parseargs.o'), - os.path.join('poppler', '.libs', 'libpoppler.a')], - env_init={ 'FONTCONFIG_CFLAGS': ' ', 'FONTCONFIG_LIBS': ' ' }, - configure_args=['--disable-libjpeg', '--disable-libpng', '--disable-poppler-qt', '--disable-poppler-qt4', '--disable-cms', '--disable-cairo-output', '--disable-abiword-output', '--enable-shared=no']) + # Combine libraries - # Combine libraries + combined = os.path.join(self.get_dir(), 'poppler-combined.bc') + Building.link(poppler + freetype, combined) - combined = os.path.join(self.get_dir(), 'poppler-combined.bc') - Building.link(poppler + freetype, combined) + self.do_ll_run(combined, + map(ord, open(path_from_root('tests', 'poppler', 'ref.ppm'), 'r').read()).__str__().replace(' ', ''), + args='-scale-to 512 paper.pdf filename'.split(' '), + post_build=post) + #, build_ll_hook=self.do_autodebug) - self.do_ll_run(combined, - map(ord, open(path_from_root('tests', 'poppler', 'ref.ppm'), 'r').read()).__str__().replace(' ', ''), - args='-scale-to 512 paper.pdf filename'.split(' '), - post_build=post) - #, build_ll_hook=self.do_autodebug) + test() + num_original_funcs = self.count_funcs('src.cpp.o.js') + + # Run with duplicate function elimination turned on + dfe_supported_opt_levels = ['-O2', '-O3', '-Oz', '-Os'] + + for opt_level in dfe_supported_opt_levels: + if opt_level in self.emcc_args: + print >> sys.stderr, "Testing poppler with ELIMINATE_DUPLICATE_FUNCTIONS set to 1" + Settings.ELIMINATE_DUPLICATE_FUNCTIONS = 1 + test() + + # Make sure that DFE ends up eliminating more than 200 functions + assert(num_original_funcs - self.count_funcs('src.cpp.o.js')) > 200 + break def test_openjpeg(self): Building.COMPILER_TEST_OPTS = filter(lambda x: x != '-g', Building.COMPILER_TEST_OPTS) # remove -g, so we have one test without it by default diff --git a/tests/test_other.py b/tests/test_other.py index 7b46b710ca11c..b8fecfe32c15e 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -6055,3 +6055,130 @@ def test(contents): check_execute([PYTHON, EMCC, 'src.cpp', '-O2']) # optimized, so no assertions self.assertNotContained(WARNING, open('a.out.js').read()) + ############################################################ + # Function eliminator tests + ############################################################ + def normalize_line_endings(self, input): + return input.replace('\r\n', '\n').replace('\n\n', '\n').replace('\n\n', '\n') + + def get_file_contents(self, file): + file_contents = "" + with open(file) as fout: + file_contents = "".join(fout.readlines()) + + file_contents = self.normalize_line_endings(file_contents) + + return file_contents + + def function_eliminator_test_helper(self, input_file, expected_output_file, use_hash_info=False): + input_file = path_from_root('tests', 'optimizer', input_file) + expected_output_file = path_from_root('tests', 'optimizer', expected_output_file) + command = [path_from_root('tools', 'eliminate-duplicate-functions.js'), input_file, '--no-minimize-whitespace', '--use-asm-ast'] + + if use_hash_info: + command.append('--use-hash-info') + + output, err = Popen(NODE_JS + command, stdin=PIPE, stderr=PIPE, stdout=PIPE).communicate() + assert err == '', err + expected_output = self.get_file_contents(expected_output_file) + output = self.normalize_line_endings(output) + + self.assertIdentical(expected_output, output) + + def test_function_eliminator_simple(self): + self.function_eliminator_test_helper('test-function-eliminator-simple.js', + 'test-function-eliminator-simple-output.js') + + def test_function_eliminator_replace_function_call(self): + self.function_eliminator_test_helper('test-function-eliminator-replace-function-call.js', + 'test-function-eliminator-replace-function-call-output.js') + + def test_function_eliminator_replace_function_call_two_passes(self): + self.function_eliminator_test_helper('test-function-eliminator-replace-function-call-output.js', + 'test-function-eliminator-replace-function-call-two-passes-output.js') + + def test_function_eliminator_replace_array_value(self): + import tools.duplicate_function_eliminator + output_file = 'output.js' + + try: + tools.shared.safe_copy(path_from_root('tests', 'optimizer', 'test-function-eliminator-replace-array-value.js'), output_file) + + tools.duplicate_function_eliminator.run(output_file) + + output_file_contents = self.get_file_contents(output_file) + + expected_file_contents = self.get_file_contents(path_from_root('tests', 'optimizer', 'test-function-eliminator-replace-array-value-output.js')) + + self.assertIdentical(output_file_contents, expected_file_contents) + finally: + tools.tempfiles.try_delete(output_file) + + def test_function_eliminator_replace_object_value_assignment(self): + self.function_eliminator_test_helper('test-function-eliminator-replace-object-value-assignment.js', + 'test-function-eliminator-replace-object-value-assignment-output.js') + + def test_function_eliminator_variable_clash(self): + self.function_eliminator_test_helper('test-function-eliminator-variable-clash.js', + 'test-function-eliminator-variable-clash-output.js') + + def test_function_eliminator_replace_variable_value(self): + self.function_eliminator_test_helper('test-function-eliminator-replace-variable-value.js', + 'test-function-eliminator-replace-variable-value-output.js') + + def test_function_eliminator_double_parsed_correctly(self): + # This is a test that makes sure that when we perform final optimization on + # the JS file, doubles are preserved (and not converted to ints). + import tools.tempfiles + import tools.duplicate_function_eliminator + import tools.js_optimizer + + output_file = 'output.js' + + try: + tools.shared.safe_copy(path_from_root('tests', 'optimizer', 'test-function-eliminator-double-parsed-correctly.js'), output_file) + + # Run duplicate function elimination + tools.duplicate_function_eliminator.run(output_file) + + # Run last opts + tools.shared.safe_copy(tools.js_optimizer.run(output_file, ['last', 'asm']), output_file) + output_file_contents = self.get_file_contents(output_file) + + # Compare + expected_file_contents = self.get_file_contents(path_from_root('tests', 'optimizer', 'test-function-eliminator-double-parsed-correctly-output.js')) + self.assertIdentical(output_file_contents, expected_file_contents) + finally: + tools.tempfiles.try_delete(output_file) + + # Now do the same, but using a pre-generated equivalent function hash info that + # comes in handy for parallel processing + def test_function_eliminator_simple_with_hash_info(self): + self.function_eliminator_test_helper('test-function-eliminator-simple-with-hash-info.js', + 'test-function-eliminator-simple-output.js', + use_hash_info=True) + + def test_function_eliminator_replace_function_call_with_hash_info(self): + self.function_eliminator_test_helper('test-function-eliminator-replace-function-call-with-hash-info.js', + 'test-function-eliminator-replace-function-call-output.js', + use_hash_info=True) + + def test_function_eliminator_replace_function_call_two_passes_with_hash_info(self): + self.function_eliminator_test_helper('test-function-eliminator-replace-function-call-output-with-hash-info.js', + 'test-function-eliminator-replace-function-call-two-passes-output.js', + use_hash_info=True) + + def test_function_eliminator_replace_object_value_assignment_with_hash_info(self): + self.function_eliminator_test_helper('test-function-eliminator-replace-object-value-assignment-with-hash-info.js', + 'test-function-eliminator-replace-object-value-assignment-output.js', + use_hash_info=True) + + def test_function_eliminator_variable_clash_with_hash_info(self): + self.function_eliminator_test_helper('test-function-eliminator-variable-clash-with-hash-info.js', + 'test-function-eliminator-variable-clash-output.js', + use_hash_info=True) + + def test_function_eliminator_replace_variable_value_with_hash_info(self): + self.function_eliminator_test_helper('test-function-eliminator-replace-variable-value-with-hash-info.js', + 'test-function-eliminator-replace-variable-value-output.js', + use_hash_info=True) diff --git a/tools/duplicate_function_eliminator.py b/tools/duplicate_function_eliminator.py new file mode 100644 index 0000000000000..1a673136e2cad --- /dev/null +++ b/tools/duplicate_function_eliminator.py @@ -0,0 +1,378 @@ + +import os, sys, subprocess, multiprocessing, re, string, json, shutil, logging, traceback +import shared +from js_optimizer import * + +DUPLICATE_FUNCTION_ELIMINATOR = path_from_root('tools', 'eliminate-duplicate-functions.js') + +def process_shell(js, js_engine, shell, equivalentfn_hash_info=None): + suffix = '.eliminatedupes' + + temp_file = temp_files.get(suffix + '.js').name + f = open(temp_file, 'w') + f.write(shell) + f.write('\n') + + f.write(equivalentfn_hash_info) + f.close() + + (output,error) = subprocess.Popen(js_engine + + [DUPLICATE_FUNCTION_ELIMINATOR, temp_file, '--use-hash-info', '--no-minimize-whitespace'], + stdout=subprocess.PIPE,stderr=subprocess.PIPE).communicate() + assert len(output) > 0 + assert len(error) == 0 + + return output + +def run_on_chunk(command): + try: + file_suffix = '.js' + index = command.index(DUPLICATE_FUNCTION_ELIMINATOR) + filename = command[index + 1] + + if '--gen-hash-info' in command: + file_suffix = '.json' + + if os.environ.get('EMCC_SAVE_OPT_TEMP') and os.environ.get('EMCC_SAVE_OPT_TEMP') != '0': + saved = 'save_' + os.path.basename(filename) + while os.path.exists(saved): saved = 'input' + str(int(saved.replace('input', '').replace('.txt', ''))+1) + '.txt' + print >> sys.stderr, 'running DFE command', ' '.join(map(lambda c: c if c != filename else saved, command)) + shutil.copyfile(filename, os.path.join(shared.get_emscripten_temp_dir(), saved)) + + if shared.EM_BUILD_VERBOSE_LEVEL >= 3: print >> sys.stderr, 'run_on_chunk: ' + str(command) + + proc = subprocess.Popen(command, stdout=subprocess.PIPE) + output = proc.communicate()[0] + assert proc.returncode == 0, 'Error in optimizer (return code ' + str(proc.returncode) + '): ' + output + assert len(output) > 0 and not output.startswith('Assertion failed'), 'Error in optimizer: ' + output + filename = temp_files.get(os.path.basename(filename) + '.jo' + file_suffix).name + + # Important to write out in binary mode, because the data we are writing contains Windows line endings '\r\n' because it was PIPED from console. + # Otherwise writing \r\n to ascii mode file will result in Windows amplifying \n to \r\n, generating bad \r\r\n line endings. + f = open(filename, 'wb') + f.write(output) + f.close() + if DEBUG and not shared.WINDOWS: print >> sys.stderr, '.' # Skip debug progress indicator on Windows, since it doesn't buffer well with multiple threads printing to console. + return filename + except KeyboardInterrupt: + # avoid throwing keyboard interrupts from a child process + raise Exception() + except (TypeError, ValueError) as e: + formatted_lines = traceback.format_exc().splitlines() + + print >> sys.stderr, ">>>>>>>>>>>>>>>>>" + for formatted_line in formatted_lines: + print >> sys.stderr, formatted_line + print >> sys.stderr, "<<<<<<<<<<<<<<<<<" + + raise + +def dump_equivalent_functions(passed_in_filename, global_data): + # Represents the sets of equivalent functions for the passed in filename + equivalent_fn_info = {} + equivalent_fn_json_file = passed_in_filename + ".equivalent_functions.json" + + # If we are running more than one pass, then we want to merge + # all the hash infos into one + if os.path.isfile(equivalent_fn_json_file): + print >> sys.stderr, "Merging data from current pass for {} into {}".format(passed_in_filename, equivalent_fn_json_file) + with open(equivalent_fn_json_file) as data_file: + equivalent_fn_info = json.load(data_file) + else: + print >> sys.stderr, "Writing equivalent functions for {} to {}".format(passed_in_filename, equivalent_fn_json_file) + + # Merge the global data's fn_hash_to_fn_name structure into + # the equivalent function info hash. + for fn_hash, fn_names in global_data['fn_hash_to_fn_name'].iteritems(): + if fn_hash not in equivalent_fn_info: + # Exclude single item arrays as they are of no use to us. + if len(fn_names) > 1: + equivalent_fn_info[fn_hash] = fn_names[:] + else: + for fn_name in fn_names: + if fn_name not in equivalent_fn_info[fn_hash]: + equivalent_fn_info[fn_hash].append(fn_name) + + with open(equivalent_fn_json_file, 'w') as fout: + fout.write(json.dumps(equivalent_fn_info)) + +def write_equivalent_fn_hash_to_file(f, json_files, passed_in_filename): + # Represents the aggregated info for all the json files passed in + # Each json file contains info for one of the processed chunks + global_data = {} + global_data['fn_hash_to_fn_name'] = {} + global_data['fn_hash_to_fn_body'] = {} + global_data['variable_names'] = {} + + for json_file in json_files: + with open(json_file) as data_file: + data = json.load(data_file) + + # Merge the data's fn_hash_to_fn_name structure into + # the global data hash. + for fn_hash, fn_names in data['fn_hash_to_fn_name'].iteritems(): + if fn_hash not in global_data['fn_hash_to_fn_name']: + global_data['fn_hash_to_fn_name'][fn_hash] = fn_names[:] + global_data['fn_hash_to_fn_body'][fn_hash] = data['fn_hash_to_fn_body'][fn_hash] + else: + assert(data['fn_hash_to_fn_body'][fn_hash] == global_data['fn_hash_to_fn_body'][fn_hash]) + + for fn_name in fn_names: + if fn_name not in global_data['fn_hash_to_fn_name'][fn_hash]: + global_data['fn_hash_to_fn_name'][fn_hash].append(fn_name) + + # Merge the data's variable_names structure into + # the global data hash. + for variable, value in data['variable_names'].iteritems(): + if variable not in global_data['variable_names']: + global_data['variable_names'][variable] = value + + variable_names = global_data['variable_names'] + + # Lets generate the equivalent function hash from the global data set + equivalent_fn_hash = {} + for fn_hash, fn_names in global_data['fn_hash_to_fn_name'].iteritems(): + shortest_fn = None + for fn_name in fn_names: + if (fn_name not in variable_names) and (shortest_fn is None or (len(fn_name) < len(shortest_fn))): + shortest_fn = fn_name + + if shortest_fn is not None: + for fn_name in fn_names: + if fn_name not in variable_names and fn_name != shortest_fn: + equivalent_fn_hash[fn_name] = shortest_fn + + # Dump the sets of equivalent functions if the user desires it + # This comes in handy for debugging + if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS: + dump_equivalent_functions(passed_in_filename, global_data) + + # Now write the equivalent function hash to the last line of the file + f.write('// ' + json.dumps(equivalent_fn_hash, separators=(',',':'))) + +# gen_hash_info is used to determine whether we are generating +# the global set of function implementation hashes. If set to +# False, we assume that we have to use the global hash info to +# reduce the set of duplicate functions +def run_on_js(filename, gen_hash_info=False): + js_engine=shared.NODE_JS + + js = open(filename).read() + if os.linesep != '\n': + js = js.replace(os.linesep, '\n') # we assume \n in the splitting code + + equivalentfn_hash_info = None + passed_in_filename = filename + + # Find markers + start_funcs = js.find(start_funcs_marker) + end_funcs = js.rfind(end_funcs_marker) + + if start_funcs < 0 or end_funcs < start_funcs: + logging.critical('Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s)' % (start_funcs, end_funcs)) + sys.exit(1) + + if not gen_hash_info: + equivalentfn_hash_info = js[js.rfind('//'):] + + start_asm = js.find(start_asm_marker) + end_asm = js.rfind(end_asm_marker) + assert (start_asm >= 0) == (end_asm >= 0) + + # We need to split out the asm shell as well, for minification + pre = js[:start_asm + len(start_asm_marker)] + post = js[end_asm:] + asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' +EMSCRIPTEN_FUNCS(); +''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] + js = js[start_funcs + len(start_funcs_marker):end_funcs] + + # we assume there is a maximum of one new name per line + asm_shell_pre, asm_shell_post = process_shell(js, js_engine, asm_shell, equivalentfn_hash_info).split('EMSCRIPTEN_FUNCS();'); + asm_shell_post = asm_shell_post.replace('});', '})'); + pre += asm_shell_pre + '\n' + start_funcs_marker + post = end_funcs_marker + asm_shell_post + post + + if not gen_hash_info: + # We don't need the extra info at the end + post = post[:post.rfind('//')].strip() + else: + pre = js[:start_funcs + len(start_funcs_marker)] + post = js[end_funcs + len(end_funcs_marker):] + js = js[start_funcs + len(start_funcs_marker):end_funcs] + post = end_funcs_marker + post + + total_size = len(js) + funcs = split_funcs(js, False) + + js = None + + # if we are making source maps, we want our debug numbering to start from the + # top of the file, so avoid breaking the JS into chunks + cores = int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) + + intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) + chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) + chunks = shared.chunkify(funcs, chunk_size) + + chunks = filter(lambda chunk: len(chunk) > 0, chunks) + if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: num funcs:', len(funcs), 'actual num chunks:', len(chunks), 'chunk size range:', max(map(len, chunks)), '-', min(map(len, chunks)) + funcs = None + + if len(chunks) > 0: + def write_chunk(chunk, i): + temp_file = temp_files.get('.jsfunc_%d.js' % i).name + f = open(temp_file, 'w') + f.write(chunk) + + if not gen_hash_info: + f.write('\n') + f.write(equivalentfn_hash_info) + f.close() + return temp_file + filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] + else: + filenames = [] + + old_filenames = filenames[:] + if len(filenames) > 0: + commands = map(lambda filename: js_engine + [DUPLICATE_FUNCTION_ELIMINATOR, filename, '--gen-hash-info' if gen_hash_info else '--use-hash-info', '--no-minimize-whitespace'], filenames) + + if DEBUG and commands is not None: + print >> sys.stderr, [' '.join(command if command is not None else '(null)') for command in commands] + + cores = min(cores, len(filenames)) + if len(chunks) > 1 and cores >= 2: + # We can parallelize + if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % (len(chunks), cores, total_size/(1024*1024.)) + pool = multiprocessing.Pool(processes=cores) + filenames = pool.map(run_on_chunk, commands, chunksize=1) + else: + # We can't parallize, but still break into chunks to avoid uglify/node memory issues + if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % (len(chunks)) + filenames = [run_on_chunk(command) for command in commands] + else: + filenames = [] + + json_files = [] + + # We're going to be coalescing the files back at the end + # Just replace the file list with the ones provided in + # the command list - and save off the generated Json + if gen_hash_info: + json_files = filenames[:] + filenames = old_filenames[:] + + for filename in filenames: temp_files.note(filename) + + filename += '.jo.js' + f = open(filename, 'w') + f.write(pre); + pre = None + + # sort functions by size, to make diffing easier and to improve aot times + funcses = [] + for out_file in filenames: + funcses.append(split_funcs(open(out_file).read(), False)) + funcs = [item for sublist in funcses for item in sublist] + funcses = None + def sorter(x, y): + diff = len(y[1]) - len(x[1]) + if diff != 0: return diff + if x[0] < y[0]: return 1 + elif x[0] > y[0]: return -1 + return 0 + if not os.environ.get('EMCC_NO_OPT_SORT'): + funcs.sort(sorter) + + for func in funcs: + f.write(func[1]) + funcs = None + + f.write('\n') + f.write(post); + # No need to write suffix: if there was one, it is inside post which exists when suffix is there + f.write('\n') + + if gen_hash_info and len(json_files) > 0: + write_equivalent_fn_hash_to_file(f, json_files, passed_in_filename) + f.close() + + return filename + +def save_temp_file(file_to_process): + if os.environ.get('EMSCRIPTEN_SAVE_TEMP_FILES') and os.environ.get('EMSCRIPTEN_TEMP_FILES_DIR'): + destinationFile = file_to_process + + temp_dir_name = os.environ.get('TEMP_DIR') + destinationFile = destinationFile.replace(temp_dir_name, os.environ.get('EMSCRIPTEN_TEMP_FILES_DIR')) + + if not os.path.exists(os.path.dirname(destinationFile)): + os.makedirs(os.path.dirname(destinationFile)) + + print >> sys.stderr, "Copying {} to {}".format(file_to_process, destinationFile) + shutil.copyfile(file_to_process, destinationFile) + +def get_func_names(javascript_file): + func_names = [] + start_tok = "// EMSCRIPTEN_START_FUNCS" + end_tok = "// EMSCRIPTEN_END_FUNCS" + start_off = 0 + end_off = 0 + + with open (javascript_file, 'rt') as fin: + blob = "".join(fin.readlines()) + start_off = blob.find(start_tok) + len(start_tok) + end_off = blob.find(end_tok) + asm_chunk = blob[start_off:end_off] + + for match in re.finditer('function (\S+?)\s*\(', asm_chunk): + func_names.append(match.groups(1)[0]) + + return func_names + +def eliminate_duplicate_funcs(file_name): + if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS != 0: + # Remove previous log file if it exists + equivalent_fn_json_file = file_name + ".equivalent_functions.json" + if os.path.isfile(equivalent_fn_json_file): + print >> sys.stderr, "Deleting old json: " + equivalent_fn_json_file + os.remove(equivalent_fn_json_file) + + old_funcs = get_func_names(file_name) + + for pass_num in range(shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_PASSES): + if DEBUG: print >> sys.stderr, "[PASS {}]: eliminating duplicate functions in: {}.".format(pass_num, file_name) + + # Generate the JSON for the equivalent hash first + processed_file = run_on_js(filename=file_name, gen_hash_info=True) + + save_temp_file(processed_file) + + # Use the hash to reduce the JS file + final_file = run_on_js(filename=processed_file, gen_hash_info=False) + + save_temp_file(final_file) + + shared.safe_move(final_file, file_name) + + if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS != 0: + new_funcs = get_func_names(file_name) + + eliminated_funcs_file = file_name + ".eliminated_functions.json" + print >> sys.stderr, "Writing eliminated functions to file: {}".format(eliminated_funcs_file) + + with open(eliminated_funcs_file, 'w') as fout: + eliminated_functions = list(set(old_funcs)-set(new_funcs)) + eliminated_functions.sort() + for eliminated_function in eliminated_functions: + fout.write('{}\n'.format(eliminated_function)) + +def run(filename, js_engine=shared.NODE_JS): + js_engine = shared.listify(js_engine) + + return temp_files.run_and_clean(lambda: eliminate_duplicate_funcs(filename)) + +if __name__ == '__main__': + out = run(sys.argv[1], sys.argv[2:]) + diff --git a/tools/eliminate-duplicate-functions.js b/tools/eliminate-duplicate-functions.js new file mode 100644 index 0000000000000..ce015d9a4289b --- /dev/null +++ b/tools/eliminate-duplicate-functions.js @@ -0,0 +1,541 @@ +/////////////////////////////////////////////////////////////////////////////////////////////// +// Eliminate-Duplicate-Functions.js +// +// This is a Javascript file that is used to post-process an Emscripten transpiled JS file. +// It will remove all the duplicate functions from the generated ASM. In its current form, +// the input JS file is expected to be a 'chunk' from an Emscripten generated ASM.JS file. +// +// An ASM JS chunk consists of a number of ASM.JS function definitions. It can also represent +// the ASM JS 'shell' which consists of the global variable declarations for the generated ASM JS. +// +// The file will remove all the generated functions that are deemed to be identical. Currently, +// the file will only run one pass of the algorithm. The caller of this JS file can run multiple +// passes to ensure that higher level functions which will become identical after a pass can +// be further eliminated. +// +// Usually, 4 or at most 5 passes will result in an optimal reduction - i.e., in a file that +// cannot be reduced any further. +/////////////////////////////////////////////////////////////////////////////////////////////// +var crypto = require('crypto'); +var uglify = require('../tools/eliminator/node_modules/uglify-js'); + +var nodeFS = require('fs'); +var nodePath = require('path'); +var debug = false; +var debugFile = undefined; +var debugFileName = 'function_eliminator.log'; +var genHashInfo = false; +var useHashInfo = false; +var useAsmAst = false; + +// Variables that helps control verbosity of debug spew +// Set appropriate zones here (to 0 or 1) for debugging various +// parts of the algorithm. +var ZONE_IDENTIFY_DUPLICATE_FUNCS = 1; +var ZONE_REPLACE_FUNCTION_REFERENCES = 1; +var ZONE_REPLACE_DUPLICATE_FUNCS = 1; +var ZONE_EQUIVALENT_FUNCTION_HASH = 1; +var ZONE_TOP_LEVEL = 1; +var ZONE_DUMP_AST = 0; + +if (!nodeFS.existsSync) { + nodeFS.existsSync = function(path) { + try { + return !!nodeFS.readFileSync(path); + } catch (e) { + return false; + } + } +} + +function srcToAst(src) { + return uglify.parser.parse(src, false, false); +} + +function astToSrc(ast, minifyWhitespace) { + return uglify.uglify.gen_code(ast, { + debug: debug, + ascii_only: true, + beautify: !minifyWhitespace, + indent_level: 1 + }); +} + +// Traverses the children of a node. If the traverse function returns an object, +// replaces the child. If it returns true, stop the traversal and return true. +function traverseChildren(node, traverse, pre, post) { + for (var i = 0; i < node.length; i++) { + var subnode = node[i]; + if (Array.isArray(subnode)) { + var subresult = traverse(subnode, pre, post); + if (subresult === true) return true; + if (subresult !== null && typeof subresult === 'object') node[i] = subresult; + } + } +} + +print = function(x) { + process['stdout'].write(x + '\n'); +}; + +printErr = function(x) { + process['stderr'].write(x + '\n'); +}; + +function debugLog(zone, str) { + if (debug && (zone !== 0)) { + nodeFS.writeSync(debugFile, str + '\n'); + } +} + +// Traverses a JavaScript syntax tree rooted at the given node calling the given +// callback for each node. +// @arg node: The root of the AST. +// @arg pre: The pre to call for each node. This will be called with +// the node as the first argument and its type as the second. If true is +// returned, the traversal is stopped. If an object is returned, +// it replaces the passed node in the tree. If null is returned, we stop +// traversing the subelements (but continue otherwise). +// @arg post: A callback to call after traversing all children. +// @returns: If the root node was replaced, the new root node. If the traversal +// was stopped, true. Otherwise undefined. +function traverse(node, pre, post) { + var type = node[0], + result, len; + var relevant = typeof type === 'string'; + if (relevant) { + var result = pre(node, type); + if (result === true) return true; + if (result && result !== null) node = result; // Continue processing on this node + } + if (result !== null) { + if (traverseChildren(node, traverse, pre, post) === true) return true; + } + if (relevant) { + if (post) { + var postResult = post(node, type); + result = result || postResult; + } + } + return result; +} + +function dumpAst(ast) { + debugLog(ZONE_DUMP_AST, JSON.stringify(ast, null, ' ')); +} + +function getFunctionBody(node) { + // Remove the function part of the source for the function + var functionSrc = astToSrc(node, true); + var functionNameRegex = /(function .*?)\(/; + return functionSrc.replace(functionNameRegex, "("); +} + +function traverseFunctions(ast, callback) { + var topLevelList = useAsmAst ? ast : ast[1]; + + for (var listIndex = 0; listIndex < topLevelList.length; ++listIndex) { + var node = topLevelList[listIndex]; + + if (node[0] === 'defun') { + callback(node); + } + } +} + +function identifyDuplicateFunctions(ast) { + debugLog(ZONE_TOP_LEVEL, "identifyDuplicateFunctions"); + + var functionHashToFunctionName = {}; + + traverseFunctions(ast, function(node) { + debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, "Node: " + node); + var functionBody = getFunctionBody(node); + + debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, "Function Body: " + functionBody + "\n"); + var functionHash = crypto.createHash('sha256').update(functionBody).digest('hex'); + + if (functionHashToFunctionName[functionHash] === undefined) { + functionHashToFunctionName[functionHash] = []; + } + + debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, typeof node[1]); + functionHashToFunctionName[functionHash].push(node[1]); + debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, functionHash + '->' + node[1]); + }); + + if (debug) { + for (var key in functionHashToFunctionName) { + debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, key + "->" + functionHashToFunctionName[key]); + } + } + + return functionHashToFunctionName; +} + +function getVariableNames(ast) { + var variableNames = {}; + traverse(ast, function(node, type) { + if (type === 'var') { + + var vars = node[1]; + + if (Array.isArray(vars)) { + for (var i = 0; i < vars.length; i++) { + var ident = vars[i][0]; + + variableNames[ident] = 1; + } + } + } + }); + + return variableNames; +} + +function replaceFunctionDefinitions(ast, equivalentFunctionHash) { + debugLog(ZONE_TOP_LEVEL, 'replaceFunctionDefinitions'); + + var topLevelList = useAsmAst ? ast : ast[1]; + var indicesToRemove = []; + for (var listIndex = 0; listIndex < topLevelList.length; ++listIndex) { + var node = topLevelList[listIndex]; + + if (node[0] === 'defun' && equivalentFunctionHash[node[1]] !== undefined) { + indicesToRemove.push(listIndex); + } + } + + if (indicesToRemove.length > 0) { + for (var i = indicesToRemove.length - 1; i >= 0; --i) { + debugLog(ZONE_REPLACE_DUPLICATE_FUNCS, "Removing " + topLevelList[indicesToRemove[i]][1]); + topLevelList.splice(indicesToRemove[i], 1); + } + } +} + +function replaceFunctionReferences(ast, equivalentFunctionHash) { + debugLog(ZONE_TOP_LEVEL, 'replaceFunctionReferences'); + traverse(ast, function(node, type) { + if (type === 'call') { + var functionName = node[1][1]; + + // Replace the call with a call to the equivalent function if there is one + if (equivalentFunctionHash[functionName] !== undefined) { + node[1][1] = equivalentFunctionHash[functionName]; + } + } else if (type === 'var') { + var vars = node[1]; + for (var i = 0; i < vars.length; i++) { + debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Variable: ' + vars[i]); + var value = vars[i][1][1]; + debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Variable value: ' + value); + + if (equivalentFunctionHash[value] !== undefined) { + debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Variable value replacement: ' + equivalentFunctionHash[value]); + vars[i][1][1] = equivalentFunctionHash[value]; + } + } + } else if (type === 'assign') { + if (node[3][0] === 'name' && equivalentFunctionHash[node[3][1]] !== undefined) { + node[3][1] = equivalentFunctionHash[node[3][1]]; + } + } else if (type === 'object') { + var assignments = node[1]; + + for (var i = 0; i < assignments.length; i++) { + debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Object Value Assignment: ' + assignments[i][1][1]); + + if (equivalentFunctionHash[assignments[i][1][1]] !== undefined) { + assignments[i][1][1] = equivalentFunctionHash[assignments[i][1][1]]; + } + } + } else if (type === 'array') { + var arrayVars = node[1]; + + if (Array.isArray(arrayVars)) { + for (var i = 0; i < arrayVars.length; i++) { + debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, "Array: " + arrayVars[i][0] + ", " + arrayVars[i][1]); + // First element contains type, 2nd contains value + if (arrayVars[i][0] == 'name' && equivalentFunctionHash[arrayVars[i][1]] !== undefined) { + debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, "Replacing array value " + arrayVars[i][1]); + arrayVars[i][1] = equivalentFunctionHash[arrayVars[i][1]]; + } + } + } else { + debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, "ArrayVars (not an array): " + arrayVars + ", node: " + node); + } + } + }); +} + +function replaceDuplicateFuncs(ast, equivalentFunctionHash) { + debugLog(ZONE_TOP_LEVEL, "replaceDuplicateFuncs"); + + // Replace references to all functions with their equivalent function + replaceFunctionReferences(ast, equivalentFunctionHash); + + // Now lets replace the function definitions + replaceFunctionDefinitions(ast, equivalentFunctionHash); +} + +function logEquivalentFunctionHash(equivalentFunctionHash) { + if (debug && ZONE_EQUIVALENT_FUNCTION_HASH != 0) { + debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, "Equivalent Function Hash:"); + for (var fn in equivalentFunctionHash) { + debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, fn + "->" + equivalentFunctionHash[fn]); + } + } +} + +function generateEquivalentFunctionHash(functionHashToFunctionName, variableNames) { + var equivalentFunctionHash = {}; + + debugLog(ZONE_TOP_LEVEL, "generateEquivalentFunctionHash"); + + if (debug && ZONE_EQUIVALENT_FUNCTION_HASH != 0) { + debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, "Equivalent Functions:"); + + for (var fnHash in functionHashToFunctionName) { + if (functionHashToFunctionName[fnHash].length > 1) { + debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, JSON.stringify(functionHashToFunctionName[fnHash], null, ' ')); + } + } + } + + for (var fnHash in functionHashToFunctionName) { + var equivalentFunctions = functionHashToFunctionName[fnHash]; + var shortestFunction = undefined; + var equivalentFn = undefined; + + // From each list of equivalent functions, pick the + // shortest one that is not also a variable name + for (var index in equivalentFunctions) { + equivalentFn = equivalentFunctions[index]; + + // If one of the variables is not the same name as the equivalent function, + // and the equivalent function is shorter than the shortest function. + if ((variableNames[equivalentFn] === undefined) && + (shortestFunction === undefined || equivalentFn.length < shortestFunction.length)) { + shortestFunction = equivalentFn; + } + + if (debug && variableNames[equivalentFn] !== undefined) { + debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, equivalentFn + " is a variable"); + } + } + + if (shortestFunction !== undefined) { + // Populate the equivalent function hash with this info + for (var index in equivalentFunctions) { + equivalentFn = equivalentFunctions[index]; + + // If we're not the shortest function, and + // we are not a variable name + if ((equivalentFn !== shortestFunction) && variableNames[equivalentFn] === undefined) { + equivalentFunctionHash[equivalentFn] = shortestFunction; + debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, equivalentFn + "->" + shortestFunction); + } + } + } + } + + return equivalentFunctionHash; +} + +function getBodyForFunction(ast, functionName) { + var functionBody = undefined; + var topLevelList = ast[1]; + + for (var listIndex = 0; listIndex < topLevelList.length; ++listIndex) { + var node = topLevelList[listIndex]; + + if (node[0] === 'defun' && node[1] === functionName) { + functionBody = getFunctionBody(node); + break; + } + } + + return functionBody; +} + +function checkForHashCollisions(ast, functionHashToFunctionName) { + var functionHashToFunctionBody = {}; + + for (var functionHash in functionHashToFunctionName) { + var equivalentFunctions = functionHashToFunctionName[functionHash]; + var functionBody = getBodyForFunction(ast, equivalentFunctions[0]); + + functionHashToFunctionBody[functionHash] = functionBody; + + // If we have more than one equivalent function, make sure + // that the bodies are the same from the hash values + if (equivalentFunctions.length > 1) { + for (var functionIndex = 1; functionIndex < equivalentFunctions.length; ++functionIndex) { + var curFunctionBody = getBodyForFunction(ast, equivalentFunctions[functionIndex]); + + if (curFunctionBody !== functionBody) { + printErr("ERROR!!! Function bodies for two hash-equivalent functions differ!!! Candidates: " + + equivalentFunctions[0] + ", " + equivalentFunctions[functionIndex]); + process.exit(1); + } + } + } + } + + return functionHashToFunctionBody; +} + +function eliminateDuplicateFuncs(ast) { + debugLog(ZONE_TOP_LEVEL, "eliminateDuplicateFuncs"); + + // Phase 1 - identify duplicate functions + var functionHashToFunctionName = identifyDuplicateFunctions(ast); + + // Phase 1.1 - Check for hash collisions + checkForHashCollisions(ast, functionHashToFunctionName); + + // Phase 2 - identify variables that conflict with function names + var variableNames = getVariableNames(ast); + + // Phase 3 - generate the equivalent function hash + var equivalentFunctionHash = generateEquivalentFunctionHash(functionHashToFunctionName, variableNames); + + // Phase 4 - for each set of equivalent functions, pick one and + // use it to replace the other equivalent functions. + replaceDuplicateFuncs(ast, equivalentFunctionHash); + + return; +} + +function find(filename) { + var prefixes = [nodePath.join(__dirname, '..', 'src'), process.cwd()]; + for (var i = 0; i < prefixes.length; ++i) { + var combined = nodePath.join(prefixes[i], filename); + if (nodeFS.existsSync(combined)) { + return combined; + } + } + return filename; +} + +function findAsmAst(ast) { + var asmNode = undefined; + traverse(ast, function(node, type) { + if (type === 'var') { + + var vars = node[1]; + for (var i = 0; i < vars.length; i++) { + var ident = vars[i][0]; + + if (ident === 'asm') { + asmNode = vars[i][1][1][3]; // asm->call->toplevel-ast + } + } + } + }); + + return asmNode; +} + +function printHashInfo(ast) { + debugLog(ZONE_TOP_LEVEL, "printHashInfo"); + + var infoHash = {}; + infoHash['variable_names'] = getVariableNames(ast); + infoHash['fn_hash_to_fn_name'] = identifyDuplicateFunctions(ast); + infoHash['fn_hash_to_fn_body'] = checkForHashCollisions(ast, infoHash['fn_hash_to_fn_name']); + + print(JSON.stringify(infoHash)); +} + +read = function(filename) { + var absolute = find(filename); + return nodeFS['readFileSync'](absolute).toString(); +}; + +// Main +var arguments_ = process['argv'].slice(2); +var noMinimizeWhitespace = false; // Eliminate whitespace by default +var functionName = undefined; +var src = undefined; + +for (var argIndex = 0; argIndex < arguments_.length; ++argIndex) { + var arg = arguments_[argIndex]; + if (arg === '--debug') { + debug = true; + debugFile = nodeFS.openSync(debugFileName, 'w'); + } else if (arg === '--no-minimize-whitespace') { + noMinimizeWhitespace = true; + } else if (arg === '--gen-hash-info') { + genHashInfo = true; + } else if (arg === '--use-hash-info') { + useHashInfo = true; + } else if (arg === '--use-asm-ast') { + useAsmAst = true; + } else if (arg === '--get-function-body') { + if (argIndex === arguments_.length_ - 1) { + throw new Error('Please specify valid arguments!'); + } + + functionName = arguments_[argIndex+1]; + argIndex += 1; + } else if (/^--/.test(arg)) { + throw new Error('Please specify valid arguments!'); + } else if (src === undefined) { + src = read(arg); + } else { + throw new Error('Please specify valid arguments!'); + } +} + +var ast = srcToAst(src); +var asmAst = ast; + +if (useAsmAst) { + asmAst = findAsmAst(ast); +} + +if (debug) { + dumpAst(ast); +} + +if (functionName !== undefined) { + var functionBody = getBodyForFunction(ast, functionName); + + if (functionBody === undefined) { + throw new Error('Could not find body for function ' + functionName + '!!!'); + } + + print(functionBody); +} else if (genHashInfo) { + printHashInfo(asmAst); +} else { + equivalentFunctionHash = {}; + + if (useHashInfo) { + // The last line has the required info + infoHashJsonStart = src.lastIndexOf("//") + 2 // 2 for going past the // + + if (infoHashJsonStart == -1) { + throw new Error('--use-hash-info specified but no JSON found at the end of the file!'); + } + + equivalentFunctionHash = JSON.parse(src.substring(infoHashJsonStart)); + + logEquivalentFunctionHash(equivalentFunctionHash); + replaceDuplicateFuncs(asmAst, equivalentFunctionHash); + } else { + eliminateDuplicateFuncs(asmAst); + } + + var minimizeWhitespace = (debug || noMinimizeWhitespace) ? false : true; + var js = astToSrc(ast, minimizeWhitespace); + + print(js); +} + +if (debug && debugFile !== undefined) { + printErr('Wrote debug log to ' + debugFileName); + nodeFS.close(debugFile); +} \ No newline at end of file diff --git a/tools/shared.py b/tools/shared.py index 06887033233a6..933fe44968016 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -1654,6 +1654,11 @@ def js_optimizer(filename, passes, debug=False, extra_info=None, output_filename ret = output_filename return ret + @staticmethod + def eliminate_duplicate_funcs(filename): + import duplicate_function_eliminator + duplicate_function_eliminator.eliminate_duplicate_funcs(filename) + @staticmethod def closure_compiler(filename, pretty=True): if not check_closure_compiler(): From a54bb4dbcf34d4f17c21baa07f2abf4f5e82ff76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Fri, 12 Feb 2016 12:40:57 +0200 Subject: [PATCH 19/49] Fix test_sse1 --- tests/test_sse1.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_sse1.cpp b/tests/test_sse1.cpp index f53f997776403..ec04509e06525 100644 --- a/tests/test_sse1.cpp +++ b/tests/test_sse1.cpp @@ -321,8 +321,8 @@ int main() _MM_SET_ROUNDING_MODE(roundingMode); unsigned int csr = _mm_getcsr(); _mm_setcsr(csr); - unsigned char dummyData[4096]; #endif + unsigned char dummyData[4096]; _mm_prefetch(dummyData, _MM_HINT_T0); _mm_prefetch(dummyData, _MM_HINT_T1); _mm_prefetch(dummyData, _MM_HINT_T2); From 6e586396fdb4cdca8304588b044154dd0cf069de Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 12 Feb 2016 14:37:06 -0800 Subject: [PATCH 20/49] dfe comment --- src/settings.js | 7 ++++++- tests/test_core.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/settings.js b/src/settings.js index 2dc056183eafb..7d21a993a6b03 100644 --- a/src/settings.js +++ b/src/settings.js @@ -681,7 +681,12 @@ var PTHREADS_PROFILING = 0; // True when building with --threadprofiler var MAX_GLOBAL_ALIGN = -1; // received from the backend -// Duplicate function elimination +// Duplicate function elimination. This coalesces function bodies that are +// identical, which can happen e.g. if two methods have different C/C++ +// or LLVM types, but end up identical at the asm.js level (all pointers +// are the same as int32_t in asm.js, for example). +// This option is quite slow to run, as it processes and hashes all methods +// in the codebase in multiple passes. var ELIMINATE_DUPLICATE_FUNCTIONS = 0; // disabled by default var ELIMINATE_DUPLICATE_FUNCTIONS_PASSES = 5; var ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS = 0; diff --git a/tests/test_core.py b/tests/test_core.py index 8ce7d505b8eda..d3d5a5296cf99 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -6229,7 +6229,7 @@ def process(filename): test() # Make sure that DFE ends up eliminating more than 200 functions - assert(num_original_funcs - self.count_funcs('src.cpp.o.js')) > 200 + assert (num_original_funcs - self.count_funcs('src.cpp.o.js')) > 200 break def test_openjpeg(self): From 92236a82df6bf2a1b3ba1c4794a603592bdde9b4 Mon Sep 17 00:00:00 2001 From: Pierre Krieger Date: Sat, 13 Feb 2016 20:55:15 +0100 Subject: [PATCH 21/49] Add the _Exit symbol --- AUTHORS | 1 + src/library.js | 4 ++++ tests/test_core.py | 10 ++++++++-- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/AUTHORS b/AUTHORS index 045a740dacb2b..424e0eff9661e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -235,3 +235,4 @@ a license to everyone to use it as detailed in LICENSE.) * Richard Cook (copyright owned by Tableau Software, Inc.) * Arnab Choudhury (copyright owned by Tableau Software, Inc.) * Charles Vaughn (copyright owned by Tableau Software, Inc.) +* Pierre Krieger diff --git a/src/library.js b/src/library.js index d7cdd540158a6..af0a07cc6a7ca 100644 --- a/src/library.js +++ b/src/library.js @@ -494,6 +494,10 @@ LibraryManager.library = { exit: function(status) { __exit(status); }, + _Exit__deps: ['exit'], + _Exit: function(status) { + __exit(status); + }, _ZSt9terminatev__deps: ['exit'], _ZSt9terminatev: function() { diff --git a/tests/test_core.py b/tests/test_core.py index d3d5a5296cf99..8877ccae5c2ec 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -7552,7 +7552,12 @@ def test_exit_status(self): int main() { atexit(cleanup); // this atexit should still be called printf("hello, world!\n"); - exit(118); // Unusual exit status to make sure it's working! + // Unusual exit status to make sure it's working! + if (CAPITAL_EXIT) { + _Exit(118); + } else { + exit(118); + } } ''' open('post.js', 'w').write(''' @@ -7562,7 +7567,8 @@ def test_exit_status(self): Module.callMain(); ''') self.emcc_args += ['-s', 'INVOKE_RUN=0', '--post-js', 'post.js'] - self.do_run(src, 'hello, world!\ncleanup\nI see exit status: 118') + self.do_run(src.replace('CAPITAL_EXIT', '0'), 'hello, world!\ncleanup\nI see exit status: 118') + self.do_run(src.replace('CAPITAL_EXIT', '1'), 'hello, world!\ncleanup\nI see exit status: 118') def test_noexitruntime(self): src = r''' From 5b64d4a184071e844cc45f3e84146714d2a20fa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Feb 2016 13:44:15 +0200 Subject: [PATCH 22/49] Unify programmatic test event sends in browser.test_html5_mouse. --- tests/test_html5_mouse.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_html5_mouse.c b/tests/test_html5_mouse.c index 5a9dd565e084f..01f54de3ef113 100644 --- a/tests/test_html5_mouse.c +++ b/tests/test_html5_mouse.c @@ -104,7 +104,9 @@ EM_BOOL wheel_callback(int eventType, const EmscriptenWheelEvent *e, void *userD int main() { + // Make the canvas area stand out from the background. emscripten_set_canvas_size(400, 300); + EM_ASM(Module['canvas'].style.backgroundColor = 'black';); EMSCRIPTEN_RESULT ret = emscripten_set_click_callback(0, 0, 1, mouse_callback); TEST_RESULT(emscripten_set_click_callback); @@ -141,10 +143,10 @@ int main() for(var d in data) event[d] = data[d]; window.dispatchEvent(event); } - sendEvent('click', { screenX: -500000, screenY: -500000, clientX: -500000, clientY: -500000, button: 0, buttons: 1 }); // Send a dummy event that should not be received. + sendEvent('click', { screenX: -500000, screenY: -500000, clientX: -500000, clientY: -500000, button: 0, buttons: 0 }); // Send a dummy event that should not be received. sendEvent('mousedown', { screenX: 1, screenY: 1, clientX: 1, clientY: 1, button: 0, buttons: 1 }); sendEvent('mouseup', { screenX: 1, screenY: 1, clientX: 1, clientY: 1, button: 0, buttons: 0 }); - sendEvent('dblclick', { screenX: 1, screenY: 1, clientX: 1, clientY: 1, button: 0, buttons: 1 }); + sendEvent('dblclick', { screenX: 1, screenY: 1, clientX: 1, clientY: 1, button: 0, buttons: 0 }); sendEvent('mousemove', { screenX: 1, screenY: 1, clientX: 1, clientY: 1, button: 0, buttons: 0, 'movementX': 1, 'movementY': 1 }); sendEvent('wheel', { screenX: 1, screenY: 1, clientX: 1, clientY: 1, button: 0, buttons: 0, 'deltaX': 1, 'deltaY': 1, 'deltaZ': 1, 'deltaMode': 1 }); sendEvent('mousewheel', { screenX: 1, screenY: 1, clientX: 1, clientY: 1, button: 0, buttons: 0, 'wheelDeltaX': 1, 'wheelDeltaY': 1 }); From a877f39890a4d824df676cec763db2805e3ac75d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Feb 2016 15:00:41 +0200 Subject: [PATCH 23/49] Add support and tests for building SSE3 code. --- emcc.py | 5 + system/include/emscripten/pmmintrin.h | 156 ++++++++++++++++++++++++++ system/include/emscripten/x86intrin.h | 20 ++++ tests/test_core.py | 15 +++ tests/test_sse3_full.cpp | 40 +++++++ 5 files changed, 236 insertions(+) create mode 100644 system/include/emscripten/pmmintrin.h create mode 100644 system/include/emscripten/x86intrin.h create mode 100644 tests/test_sse3_full.cpp diff --git a/emcc.py b/emcc.py index 3494343704799..e34beb5d37ae1 100755 --- a/emcc.py +++ b/emcc.py @@ -685,6 +685,11 @@ def validate_arg_level(level_string, max_level, err_msg): newargs.append('-D__SSE__=1') newargs.append('-D__SSE2__=1') newargs[i] = '' + elif newargs[i] == '-msse3': + newargs.append('-D__SSE__=1') + newargs.append('-D__SSE2__=1') + newargs.append('-D__SSE3__=1') + newargs[i] = '' if should_exit: sys.exit(0) diff --git a/system/include/emscripten/pmmintrin.h b/system/include/emscripten/pmmintrin.h new file mode 100644 index 0000000000000..ffb6c4830e052 --- /dev/null +++ b/system/include/emscripten/pmmintrin.h @@ -0,0 +1,156 @@ +/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __PMMINTRIN_H +#define __PMMINTRIN_H + +#include + +#ifndef __SSE3__ +#error "SSE3 instruction set not enabled" +#endif + +/* Define the default attributes for the functions in this file. */ +#ifdef __EMSCRIPTEN__ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#else +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse3"))) +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_lddqu_si128(__m128i const *__p) +{ +#ifdef __EMSCRIPTEN__ + return _mm_loadu_si128(__p); +#else + return (__m128i)__builtin_ia32_lddqu((char const *)__p); +#endif +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_addsub_ps(__m128 __a, __m128 __b) +{ +#ifdef __EMSCRIPTEN__ + return _mm_add_ps(__a, _mm_mul_ps(__b, _mm_set_ps(1.f, -1.f, 1.f, -1.f))); +#else + return __builtin_ia32_addsubps(__a, __b); +#endif +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_hadd_ps(__m128 __a, __m128 __b) +{ +#ifdef __EMSCRIPTEN__ + return _mm_add_ps(_mm_shuffle_ps(__a, __b, _MM_SHUFFLE(2, 0, 2, 0)), _mm_shuffle_ps(__a, __b, _MM_SHUFFLE(3, 1, 3, 1))); +#else + return __builtin_ia32_haddps(__a, __b); +#endif +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_hsub_ps(__m128 __a, __m128 __b) +{ +#ifdef __EMSCRIPTEN__ + return _mm_sub_ps(_mm_shuffle_ps(__a, __b, _MM_SHUFFLE(2, 0, 2, 0)), _mm_shuffle_ps(__a, __b, _MM_SHUFFLE(3, 1, 3, 1))); +#else + return __builtin_ia32_hsubps(__a, __b); +#endif +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_movehdup_ps(__m128 __a) +{ + return __builtin_shufflevector(__a, __a, 1, 1, 3, 3); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_moveldup_ps(__m128 __a) +{ + return __builtin_shufflevector(__a, __a, 0, 0, 2, 2); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_addsub_pd(__m128d __a, __m128d __b) +{ +#ifdef __EMSCRIPTEN__ + return _mm_add_pd(__a, _mm_mul_pd(__b, _mm_set_pd(1.0, -1.0))); +#else + return __builtin_ia32_addsubpd(__a, __b); +#endif +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_hadd_pd(__m128d __a, __m128d __b) +{ +#ifdef __EMSCRIPTEN__ + return _mm_add_pd(_mm_shuffle_pd(__a, __b, _MM_SHUFFLE2(0, 0)), _mm_shuffle_pd(__a, __b, _MM_SHUFFLE2(1, 1))); +#else + return __builtin_ia32_haddpd(__a, __b); +#endif +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_hsub_pd(__m128d __a, __m128d __b) +{ +#ifdef __EMSCRIPTEN__ + return _mm_sub_pd(_mm_shuffle_pd(__a, __b, _MM_SHUFFLE2(0, 0)), _mm_shuffle_pd(__a, __b, _MM_SHUFFLE2(1, 1))); +#else + return __builtin_ia32_hsubpd(__a, __b); +#endif +} + +#define _mm_loaddup_pd(dp) _mm_load1_pd(dp) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_movedup_pd(__m128d __a) +{ + return __builtin_shufflevector(__a, __a, 0, 0); +} + +#define _MM_DENORMALS_ZERO_ON (0x0040) +#define _MM_DENORMALS_ZERO_OFF (0x0000) + +#define _MM_DENORMALS_ZERO_MASK (0x0040) + +#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) +#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) + +#ifndef __EMSCRIPTEN__ + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints) +{ + __builtin_ia32_monitor((void *)__p, __extensions, __hints); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mwait(unsigned __extensions, unsigned __hints) +{ + __builtin_ia32_mwait(__extensions, __hints); +} + +#endif /* __EMSCRIPTEN__ */ + +#undef __DEFAULT_FN_ATTRS + +#endif /* __PMMINTRIN_H */ diff --git a/system/include/emscripten/x86intrin.h b/system/include/emscripten/x86intrin.h new file mode 100644 index 0000000000000..cdd4a50c83220 --- /dev/null +++ b/system/include/emscripten/x86intrin.h @@ -0,0 +1,20 @@ +#ifndef __X86INTRIN_H +#define __X86INTRIN_H + +// x86intrin.h is the standard include-all for all supported intrinsics. + +#if __SSE__ +#include +#else +#warning x86intrin.h included without SIMD.js support enabled. +#endif + +#if __SSE2__ +#include +#endif + +#if __SSE3__ +#include +#endif + +#endif diff --git a/tests/test_core.py b/tests/test_core.py index d3d5a5296cf99..c49ffb6696b66 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -5840,6 +5840,21 @@ def test_sse2_full(self): self.emcc_args = orig_args + mode + ['-I' + path_from_root('tests'), '-msse2'] + args self.do_run(open(path_from_root('tests', 'test_sse2_full.cpp'), 'r').read(), native_result) + # Tests the full SSE3 API. + @SIMD + def test_sse3_full(self): + args = [] + if '-O0' in self.emcc_args: args += ['-D_DEBUG=1'] + Popen([CLANG, path_from_root('tests', 'test_sse3_full.cpp'), '-o', 'test_sse3_full', '-D_CRT_SECURE_NO_WARNINGS=1', '-msse3'] + args + get_clang_native_args(), env=get_clang_native_env(), stdout=PIPE).communicate() + native_result, err = Popen('./test_sse3_full', stdout=PIPE).communicate() + native_result = native_result.replace('\r\n', '\n') # Windows line endings fix + + Settings.PRECISE_F32 = 1 # SIMD currently requires Math.fround + orig_args = self.emcc_args + for mode in [[], ['-s', 'SIMD=1']]: + self.emcc_args = orig_args + mode + ['-I' + path_from_root('tests'), '-msse3'] + args + self.do_run(open(path_from_root('tests', 'test_sse3_full.cpp'), 'r').read(), native_result) + @SIMD def test_simd(self): test_path = path_from_root('tests', 'core', 'test_simd') diff --git a/tests/test_sse3_full.cpp b/tests/test_sse3_full.cpp new file mode 100644 index 0000000000000..c0e7db2d65c37 --- /dev/null +++ b/tests/test_sse3_full.cpp @@ -0,0 +1,40 @@ +// This file uses SSE3 by calling different functions with different interesting inputs and prints the results. +// Use a diff tool to compare the results between platforms. + +#include +#define ENABLE_SSE2 +#include "test_sse_full.h" + +#ifndef _DEBUG +// The following tests break when optimizer is applied, so disable them for now. Baby steps. +// See https://github.com/kripken/emscripten/issues/3789 +#define BREAKS_UNDER_OPTIMIZATION +#endif + +float *interesting_floats = get_interesting_floats(); +int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]); +uint32_t *interesting_ints = get_interesting_ints(); +int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]); +double *interesting_doubles = get_interesting_doubles(); +int numInterestingDoubles = sizeof(interesting_doubles_)/sizeof(interesting_doubles_[0]); + +int main() +{ + assert(numInterestingFloats % 4 == 0); + assert(numInterestingInts % 4 == 0); + assert(numInterestingDoubles % 4 == 0); + + Ret_M128d_M128d(__m128d, _mm_addsub_pd); + Ret_M128_M128(__m128, _mm_addsub_ps); + Ret_M128d_M128d(__m128d, _mm_hadd_pd); + Ret_M128_M128(__m128, _mm_hadd_ps); + Ret_M128d_M128d(__m128d, _mm_hsub_pd); + Ret_M128_M128(__m128, _mm_hsub_ps); +#ifndef BREAKS_UNDER_OPTIMIZATION + Ret_IntPtr(__m128i, _mm_lddqu_si128, __m128i*, 4, 1); +#endif + Ret_DoublePtr(__m128d, _mm_loaddup_pd, 1, 1); + Ret_M128d(__m128d, _mm_movedup_pd); + Ret_M128(__m128, _mm_movehdup_ps); + Ret_M128(__m128, _mm_moveldup_ps); +} From 2a9499022aeca04dbf09b80ba9bf41652f4cc236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Feb 2016 15:03:18 +0200 Subject: [PATCH 24/49] Update docs on SSE3 SIMD.js support. --- site/source/docs/porting/simd.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/site/source/docs/porting/simd.rst b/site/source/docs/porting/simd.rst index 6291d90ef6531..05bbc5aabd699 100644 --- a/site/source/docs/porting/simd.rst +++ b/site/source/docs/porting/simd.rst @@ -17,7 +17,7 @@ There are three different ways to generate code to benefit from SIMD instruction - Emscripten supports the GCC/Clang compiler specific `SIMD Vector Extensions `_. These constructs do not require any changes to the command line build flags, but any code that utilizes the vector built-ins will always unconditionally emit SIMD.js vector instructions. -- A third option is to use the x86 SSE intrinsics. Emscripten has full support for compiling code that utilizes the SSE1 and SSE2 intrinsic function calls. To enable SSE1 intrinsics support, pass the compiler flag -msse, and add in a #include . To build SSE2 intrinsics code, pass the compiler flag -msse2, and use #include . +- A third option is to use the x86 SSE intrinsics. Emscripten has full support for compiling code that utilizes the SSE1, SSE2 and SSE3 intrinsic function calls. To enable SSE1 intrinsics support, pass the compiler flag -msse, and add in a #include . To build SSE2 intrinsics code, pass the compiler flag -msse2, and use #include . For SSE3, pass -msse3 and #include . These three methods are not mutually exclusive, but may freely be combined. @@ -30,9 +30,9 @@ When porting native SIMD code, it should be noted that because of portability co - The SIMD types supported by SIMD.js are Float32x4, Int32x4, Uint32x4, Int16x8, Uint16x8, Int8x16 and Uint8x16. In particular, Float64x2 and Int64x2 are currently not supported, however Float64x2 is emulated in software in the current polyfill. 256-bit or wider SIMD types (AVX) are not supported either. - - Even though the full set of SSE1 and SSE2 intrinsics are supported, because of the platform-abstract nature of SIMD.js, some of these intrinsics will compile down to scalarized instructions to emulate. To verify which instructions are accelerated and which are not, examine the code in the platform headers `xmmintrin.h `_ and `emmintrin.h `_. + - Even though the full set of SSE1, SSE2 and SSE3 intrinsics are supported, because of the platform-abstract nature of SIMD.js, some of these intrinsics will compile down to scalarized instructions to emulate. To verify which instructions are accelerated and which are not, examine the code in the platform headers `xmmintrin.h `_ and `emmintrin.h `_. - - Currently the Intel x86 SIMD support is limited to SSE1 and SSE2 instruction sets. The Intel x86 SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and FMA instruction sets or newer are not supported. Also, the old Intel x86 MMX instruction set is not supported. + - Currently the Intel x86 SIMD support is limited to SSE1, SSE2 and SSE3 instruction sets. The Intel x86 SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and FMA instruction sets or newer are not supported. Also, the old Intel x86 MMX instruction set is not supported. - SIMD.js does not have control over managing floating point rounding modes or handling denormals. From 617d704860b95ab4eb918d5606888483dbaebbdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Feb 2016 19:24:54 +0200 Subject: [PATCH 25/49] Add support and tests for building SSSE3 code. --- emcc.py | 6 + system/include/emscripten/tmmintrin.h | 362 ++++++++++++++++++++++++++ system/include/emscripten/x86intrin.h | 4 + tests/test_core.py | 14 + tests/test_sse_full.h | 15 ++ tests/test_ssse3_full.cpp | 37 +++ 6 files changed, 438 insertions(+) create mode 100644 system/include/emscripten/tmmintrin.h create mode 100644 tests/test_ssse3_full.cpp diff --git a/emcc.py b/emcc.py index e34beb5d37ae1..71fd9545f620c 100755 --- a/emcc.py +++ b/emcc.py @@ -690,6 +690,12 @@ def validate_arg_level(level_string, max_level, err_msg): newargs.append('-D__SSE2__=1') newargs.append('-D__SSE3__=1') newargs[i] = '' + elif newargs[i] == '-mssse3': + newargs.append('-D__SSE__=1') + newargs.append('-D__SSE2__=1') + newargs.append('-D__SSE3__=1') + newargs.append('-D__SSSE3__=1') + newargs[i] = '' if should_exit: sys.exit(0) diff --git a/system/include/emscripten/tmmintrin.h b/system/include/emscripten/tmmintrin.h new file mode 100644 index 0000000000000..331ad212e718c --- /dev/null +++ b/system/include/emscripten/tmmintrin.h @@ -0,0 +1,362 @@ +/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __TMMINTRIN_H +#define __TMMINTRIN_H + +#include + +#ifndef __SSSE3__ +#error "SSSE3 instruction set not enabled" +#endif + +/* Define the default attributes for the functions in this file. */ +#ifdef __EMSCRIPTEN__ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#else +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"))) +#endif + +#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */ +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_abs_pi8(__m64 __a) +{ + return (__m64)__builtin_ia32_pabsb((__v8qi)__a); +} +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_abs_epi8(__m128i __a) +{ +#ifdef __EMSCRIPTEN__ + __m128i __mask = (__m128i)emscripten_int8x16_shiftRightByScalar((int8x16)__a, 7); + return _mm_xor_si128(_mm_add_epi8(__a, __mask), __mask); +#else + return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a); +#endif +} + +#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */ +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_abs_pi16(__m64 __a) +{ + return (__m64)__builtin_ia32_pabsw((__v4hi)__a); +} +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_abs_epi16(__m128i __a) +{ +#ifdef __EMSCRIPTEN__ + __m128i __mask = (__m128i)emscripten_int16x8_shiftRightByScalar((int16x8)__a, 15); + return _mm_xor_si128(_mm_add_epi16(__a, __mask), __mask); +#else + return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a); +#endif +} + +#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */ +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_abs_pi32(__m64 __a) +{ + return (__m64)__builtin_ia32_pabsd((__v2si)__a); +} +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_abs_epi32(__m128i __a) +{ +#ifdef __EMSCRIPTEN__ + __m128i __mask = (__m128i)emscripten_int32x4_shiftRightByScalar((int32x4)__a, 31); + return _mm_xor_si128(_mm_add_epi32(__a, __mask), __mask); +#else + return (__m128i)__builtin_ia32_pabsd128((__v4si)__a); +#endif +} + +#ifdef __EMSCRIPTEN__ +#define _mm_alignr_epi8(__a, __b, __count) \ + ((__count <= 16) \ + ? (_mm_or_si128(_mm_bslli_si128((__a), 16 - (((unsigned int)(__count)) & 0xFF)), _mm_bsrli_si128((__b), (((unsigned int)(__count)) & 0xFF)))) \ + : (_mm_bsrli_si128((__a), (((unsigned int)(__count)) & 0xFF) - 16))) +#else +#define _mm_alignr_epi8(a, b, n) __extension__ ({ \ + (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), (n)); }) +#endif + +#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */ +#define _mm_alignr_pi8(a, b, n) __extension__ ({ \ + (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); }) +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hadd_epi16(__m128i __a, __m128i __b) +{ +#ifdef __EMSCRIPTEN__ + return _mm_add_epi16(__builtin_shufflevector((int16x8)__a, (int16x8)__b, 0, 2, 4, 6, 8, 10, 12, 14), __builtin_shufflevector((int16x8)__a, (int16x8)__b, 1, 3, 5, 7, 9, 11, 13, 15)); +#else + return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hadd_epi32(__m128i __a, __m128i __b) +{ +#ifdef __EMSCRIPTEN__ + return _mm_add_epi32(__builtin_shufflevector(__a, __b, 0, 2, 4, 6), __builtin_shufflevector(__a, __b, 1, 3, 5, 7)); +#else + return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); +#endif +} + +#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */ +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_hadd_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_hadd_pi32(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); +} +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hadds_epi16(__m128i __a, __m128i __b) +{ +#ifdef __EMSCRIPTEN__ + return (__m128i)emscripten_int16x8_addSaturate(__builtin_shufflevector((int16x8)__a, (int16x8)__b, 0, 2, 4, 6, 8, 10, 12, 14), __builtin_shufflevector((int16x8)__a, (int16x8)__b, 1, 3, 5, 7, 9, 11, 13, 15)); +#else + return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); +#endif +} + +#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */ +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_hadds_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); +} +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hsub_epi16(__m128i __a, __m128i __b) +{ +#ifdef __EMSCRIPTEN__ + return _mm_sub_epi16(__builtin_shufflevector((int16x8)__a, (int16x8)__b, 0, 2, 4, 6, 8, 10, 12, 14), __builtin_shufflevector((int16x8)__a, (int16x8)__b, 1, 3, 5, 7, 9, 11, 13, 15)); +#else + return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hsub_epi32(__m128i __a, __m128i __b) +{ +#ifdef __EMSCRIPTEN__ + return _mm_sub_epi32(__builtin_shufflevector(__a, __b, 0, 2, 4, 6), __builtin_shufflevector(__a, __b, 1, 3, 5, 7)); +#else + return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); +#endif +} + +#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */ +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_hsub_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_hsub_pi32(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); +} +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_hsubs_epi16(__m128i __a, __m128i __b) +{ +#ifdef __EMSCRIPTEN__ + return (__m128i)emscripten_int16x8_subSaturate(__builtin_shufflevector((int16x8)__a, (int16x8)__b, 0, 2, 4, 6, 8, 10, 12, 14), __builtin_shufflevector((int16x8)__a, (int16x8)__b, 1, 3, 5, 7, 9, 11, 13, 15)); +#else + return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); +#endif +} + +#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */ +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_hsubs_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); +} +#endif + +#ifdef __EMSCRIPTEN__ +static __inline__ short __DEFAULT_FN_ATTRS +__Saturate_To_Int16(int __x) +{ + return __x <= -32768 ? -32768 : (__x >= 32767 ? 32767 : __x); +} +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maddubs_epi16(__m128i __a, __m128i __b) +{ +#ifdef __EMSCRIPTEN__ + union { + char __x[16]; + short __s[8]; + __m128i __m; + } __src, __src2, __dst; + __src.__m = __a; + __src2.__m = __b; + for(int __i = 0; __i < 16; __i += 2) + __dst.__s[__i>>1] = __Saturate_To_Int16((unsigned char)__src.__x[__i+1] * __src2.__x[__i+1] + (unsigned char)__src.__x[__i] * __src2.__x[__i]); + return __dst.__m; +#else + return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); +#endif +} + +#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */ +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_maddubs_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); +} +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mulhrs_epi16(__m128i __a, __m128i __b) +{ +#ifdef __EMSCRIPTEN__ + union { + short __x[8]; + __m128i __m; + } __src, __src2, __dst; + __src.__m = __a; + __src2.__m = __b; + for(int __i = 0; __i < 8; ++__i) + __dst.__x[__i] = (((__src.__x[__i] * __src2.__x[__i]) >> 14) + 1) >> 1; + return __dst.__m; +#else + return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); +#endif +} + +#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */ +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_mulhrs_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); +} +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_shuffle_epi8(__m128i __a, __m128i __b) +{ +#ifdef __EMSCRIPTEN__ + union { + unsigned char __x[16]; + __m128i __m; + } __src, __src2, __dst; + __src.__m = __a; + __src2.__m = __b; + for(int __i = 0; __i < 16; ++__i) + __dst.__x[__i] = (__src2.__x[__i] & 0x80) ? 0 : __src.__x[__src2.__x[__i]&15]; + return __dst.__m; +#else + return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); +#endif +} + +#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */ +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_shuffle_pi8(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); +} +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sign_epi8(__m128i __a, __m128i __b) +{ +#ifdef __EMSCRIPTEN__ + __m128i __mask = (__m128i)emscripten_int8x16_shiftRightByScalar((int8x16)__b, 7); + __m128i __zeromask = (__m128i)emscripten_int8x16_notEqual((int8x16)__b, emscripten_int8x16_splat(0)); + return _mm_and_si128(__zeromask, _mm_xor_si128(_mm_add_epi8(__a, __mask), __mask)); +#else + return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sign_epi16(__m128i __a, __m128i __b) +{ +#ifdef __EMSCRIPTEN__ + __m128i __mask = (__m128i)emscripten_int16x8_shiftRightByScalar((int16x8)__b, 15); + __m128i __zeromask = (__m128i)emscripten_int16x8_notEqual((int16x8)__b, emscripten_int16x8_splat(0)); + return _mm_and_si128(__zeromask, _mm_xor_si128(_mm_add_epi16(__a, __mask), __mask)); +#else + return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sign_epi32(__m128i __a, __m128i __b) +{ +#ifdef __EMSCRIPTEN__ + __m128i __mask = (__m128i)emscripten_int32x4_shiftRightByScalar((int32x4)__b, 31); + __m128i __zeromask = (__m128i)emscripten_int32x4_notEqual((int32x4)__b, emscripten_int32x4_splat(0)); + return _mm_and_si128(__zeromask, _mm_xor_si128(_mm_add_epi32(__a, __mask), __mask)); +#else + return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); +#endif +} + +#ifndef __EMSCRIPTEN__ /* MMX registers/__m64 type is not available in Emscripten. */ +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sign_pi8(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sign_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __DEFAULT_FN_ATTRS +_mm_sign_pi32(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); +} +#endif + +#undef __DEFAULT_FN_ATTRS + +#endif /* __TMMINTRIN_H */ diff --git a/system/include/emscripten/x86intrin.h b/system/include/emscripten/x86intrin.h index cdd4a50c83220..3c0df78fc7041 100644 --- a/system/include/emscripten/x86intrin.h +++ b/system/include/emscripten/x86intrin.h @@ -17,4 +17,8 @@ #include #endif +#if __SSSE3__ +#include +#endif + #endif diff --git a/tests/test_core.py b/tests/test_core.py index c49ffb6696b66..3fe190b24a645 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -5855,6 +5855,20 @@ def test_sse3_full(self): self.emcc_args = orig_args + mode + ['-I' + path_from_root('tests'), '-msse3'] + args self.do_run(open(path_from_root('tests', 'test_sse3_full.cpp'), 'r').read(), native_result) + @SIMD + def test_ssse3_full(self): + args = [] + if '-O0' in self.emcc_args: args += ['-D_DEBUG=1'] + Popen([CLANG, path_from_root('tests', 'test_ssse3_full.cpp'), '-o', 'test_ssse3_full', '-D_CRT_SECURE_NO_WARNINGS=1', '-mssse3'] + args + get_clang_native_args(), env=get_clang_native_env(), stdout=PIPE).communicate() + native_result, err = Popen('./test_ssse3_full', stdout=PIPE).communicate() + native_result = native_result.replace('\r\n', '\n') # Windows line endings fix + + Settings.PRECISE_F32 = 1 # SIMD currently requires Math.fround + orig_args = self.emcc_args + for mode in [[], ['-s', 'SIMD=1']]: + self.emcc_args = orig_args + mode + ['-I' + path_from_root('tests'), '-mssse3'] + args + self.do_run(open(path_from_root('tests', 'test_ssse3_full.cpp'), 'r').read(), native_result) + @SIMD def test_simd(self): test_path = path_from_root('tests', 'core', 'test_simd') diff --git a/tests/test_sse_full.h b/tests/test_sse_full.h index 646b2b340ab37..f4de83aec273d 100644 --- a/tests/test_sse_full.h +++ b/tests/test_sse_full.h @@ -313,6 +313,20 @@ __m128 ExtractInRandomOrder(float *arr, int i, int n, int prime) printf("%s(%s, %s, %d) = %s\n", #func, str, str2, Tint, str3); \ } +#define Ret_M128i_M128i_Tint_body(Ret_type, func, Tint) \ + for(int i = 0; i < numInterestingInts / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j < numInterestingInts / 4; ++j) \ + { \ + __m128i m1 = E1(interesting_ints, i*4+k, numInterestingInts); \ + __m128i m2 = E2(interesting_ints, j*4, numInterestingInts); \ + Ret_type ret = func(m1, m2, Tint); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s, %d) = %s\n", #func, str, str2, Tint, str3); \ + } + #define Ret_M128_M128_Tint_body(Ret_type, func, Tint) \ for(int i = 0; i < numInterestingFloats / 4; ++i) \ for(int k = 0; k < 4; ++k) \ @@ -356,6 +370,7 @@ __m128 ExtractInRandomOrder(float *arr, int i, int n, int prime) #define Ret_M128i_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_Tint_body, func) #define Ret_M128i_int_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_int_Tint_body, func) +#define Ret_M128i_M128i_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_M128i_Tint_body, func) #define Ret_M128d_M128d_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128d_M128d_Tint_body, func) #define Ret_M128_M128_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128_M128_Tint_body, func) diff --git a/tests/test_ssse3_full.cpp b/tests/test_ssse3_full.cpp new file mode 100644 index 0000000000000..6afc1d4bc47bb --- /dev/null +++ b/tests/test_ssse3_full.cpp @@ -0,0 +1,37 @@ +// This file uses SSSE3 by calling different functions with different interesting inputs and prints the results. +// Use a diff tool to compare the results between platforms. + +#include +#define ENABLE_SSE2 +#include "test_sse_full.h" + +float *interesting_floats = get_interesting_floats(); +int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]); +uint32_t *interesting_ints = get_interesting_ints(); +int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]); +double *interesting_doubles = get_interesting_doubles(); +int numInterestingDoubles = sizeof(interesting_doubles_)/sizeof(interesting_doubles_[0]); + +int main() +{ + assert(numInterestingFloats % 4 == 0); + assert(numInterestingInts % 4 == 0); + assert(numInterestingDoubles % 4 == 0); + + Ret_M128i(__m128i, _mm_abs_epi8); + Ret_M128i(__m128i, _mm_abs_epi16); + Ret_M128i(__m128i, _mm_abs_epi32); + Ret_M128i_M128i_Tint(__m128i, _mm_alignr_epi8); + M128i_M128i_M128i(_mm_hadd_epi16); + M128i_M128i_M128i(_mm_hadd_epi32); + M128i_M128i_M128i(_mm_hadds_epi16); + M128i_M128i_M128i(_mm_hsub_epi16); + M128i_M128i_M128i(_mm_hsub_epi32); + M128i_M128i_M128i(_mm_hsubs_epi16); + M128i_M128i_M128i(_mm_maddubs_epi16); + M128i_M128i_M128i(_mm_mulhrs_epi16); + M128i_M128i_M128i(_mm_shuffle_epi8); + M128i_M128i_M128i(_mm_sign_epi16); + M128i_M128i_M128i(_mm_sign_epi32); + M128i_M128i_M128i(_mm_sign_epi8); +} From dff1e2ed81d437636bd523529afb8f1883d00f6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Feb 2016 19:26:24 +0200 Subject: [PATCH 26/49] Update docs for SSSE3. --- site/source/docs/porting/simd.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/site/source/docs/porting/simd.rst b/site/source/docs/porting/simd.rst index 05bbc5aabd699..8f00cb776e83b 100644 --- a/site/source/docs/porting/simd.rst +++ b/site/source/docs/porting/simd.rst @@ -17,7 +17,7 @@ There are three different ways to generate code to benefit from SIMD instruction - Emscripten supports the GCC/Clang compiler specific `SIMD Vector Extensions `_. These constructs do not require any changes to the command line build flags, but any code that utilizes the vector built-ins will always unconditionally emit SIMD.js vector instructions. -- A third option is to use the x86 SSE intrinsics. Emscripten has full support for compiling code that utilizes the SSE1, SSE2 and SSE3 intrinsic function calls. To enable SSE1 intrinsics support, pass the compiler flag -msse, and add in a #include . To build SSE2 intrinsics code, pass the compiler flag -msse2, and use #include . For SSE3, pass -msse3 and #include . +- A third option is to use the x86 SSE intrinsics. Emscripten has full support for compiling code that utilizes the SSE1, SSE2, SSE3 and SSSE3 intrinsic function calls. To enable SSE1 intrinsics support, pass the compiler flag -msse, and add in a #include . To build SSE2 intrinsics code, pass the compiler flag -msse2, and use #include . For SSE3, pass -msse3 and #include , and for SSSE3, pass -mssse3 and #include . These three methods are not mutually exclusive, but may freely be combined. @@ -30,9 +30,9 @@ When porting native SIMD code, it should be noted that because of portability co - The SIMD types supported by SIMD.js are Float32x4, Int32x4, Uint32x4, Int16x8, Uint16x8, Int8x16 and Uint8x16. In particular, Float64x2 and Int64x2 are currently not supported, however Float64x2 is emulated in software in the current polyfill. 256-bit or wider SIMD types (AVX) are not supported either. - - Even though the full set of SSE1, SSE2 and SSE3 intrinsics are supported, because of the platform-abstract nature of SIMD.js, some of these intrinsics will compile down to scalarized instructions to emulate. To verify which instructions are accelerated and which are not, examine the code in the platform headers `xmmintrin.h `_ and `emmintrin.h `_. + - Even though the full set of SSE1, SSE2, SSE3 and SSSE3 intrinsics are supported, because of the platform-abstract nature of SIMD.js, some of these intrinsics will compile down to scalarized instructions to emulate. To verify which instructions are accelerated and which are not, examine the code in the platform headers `xmmintrin.h `_ and `emmintrin.h `_. - - Currently the Intel x86 SIMD support is limited to SSE1, SSE2 and SSE3 instruction sets. The Intel x86 SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and FMA instruction sets or newer are not supported. Also, the old Intel x86 MMX instruction set is not supported. + - Currently the Intel x86 SIMD support is limited to SSE1, SSE2, SSE3 and SSSE3 instruction sets. The Intel x86 SSE4.1, SSE4.2, AVX, AVX2 and FMA instruction sets or newer are not supported. Also, the old Intel x86 MMX instruction set is not supported. - SIMD.js does not have control over managing floating point rounding modes or handling denormals. From 0dcf75e67378c329ece4c3ba156681a92e9c9916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Feb 2016 20:37:47 +0200 Subject: [PATCH 27/49] Add partial SSE 4.1 capability to support #4030. --- emcc.py | 7 + system/include/emscripten/emmintrin.h | 7 +- system/include/emscripten/smmintrin.h | 628 ++++++++++++++++++++++++++ system/include/emscripten/x86intrin.h | 4 + tests/test_core.py | 14 + tests/test_sse4_1_full.cpp | 104 +++++ tests/test_sse_full.h | 12 + 7 files changed, 773 insertions(+), 3 deletions(-) create mode 100644 system/include/emscripten/smmintrin.h create mode 100644 tests/test_sse4_1_full.cpp diff --git a/emcc.py b/emcc.py index 71fd9545f620c..06d2cb6154e21 100755 --- a/emcc.py +++ b/emcc.py @@ -696,6 +696,13 @@ def validate_arg_level(level_string, max_level, err_msg): newargs.append('-D__SSE3__=1') newargs.append('-D__SSSE3__=1') newargs[i] = '' + elif newargs[i] == '-msse4.1': + newargs.append('-D__SSE__=1') + newargs.append('-D__SSE2__=1') + newargs.append('-D__SSE3__=1') + newargs.append('-D__SSSE3__=1') + newargs.append('-D__SSE4_1__=1') + newargs[i] = '' if should_exit: sys.exit(0) diff --git a/system/include/emscripten/emmintrin.h b/system/include/emscripten/emmintrin.h index aecf7d7109877..d216dd5377a32 100644 --- a/system/include/emscripten/emmintrin.h +++ b/system/include/emscripten/emmintrin.h @@ -64,6 +64,10 @@ typedef long long __v2di __attribute__ ((__vector_size__ (16))); typedef short __v8hi __attribute__((__vector_size__(16))); typedef char __v16qi __attribute__((__vector_size__(16))); +/* We need an explicitly signed variant for char. Note that this shouldn't + * appear in the interface though. */ +typedef signed char __v16qs __attribute__((__vector_size__(16))); + static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_add_sd(__m128d __a, __m128d __b) { @@ -1660,9 +1664,6 @@ _mm_cmpeq_epi32(__m128i __a, __m128i __b) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_epi8(__m128i __a, __m128i __b) { - /* This function always performs a signed comparison, but __v16qi is a char - which may be signed or unsigned. */ - typedef signed char __v16qs __attribute__((__vector_size__(16))); return (__m128i)((__v16qs)__a > (__v16qs)__b); } diff --git a/system/include/emscripten/smmintrin.h b/system/include/emscripten/smmintrin.h new file mode 100644 index 0000000000000..10b5af1243d7b --- /dev/null +++ b/system/include/emscripten/smmintrin.h @@ -0,0 +1,628 @@ +/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _SMMINTRIN_H +#define _SMMINTRIN_H + +#include + +/* Define the default attributes for the functions in this file. */ +#ifdef __EMSCRIPTEN__ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#else +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"))) +#endif + +/* SSE4 Rounding macros. */ +#define _MM_FROUND_TO_NEAREST_INT 0x00 +#define _MM_FROUND_TO_NEG_INF 0x01 +#define _MM_FROUND_TO_POS_INF 0x02 +#define _MM_FROUND_TO_ZERO 0x03 +#define _MM_FROUND_CUR_DIRECTION 0x04 + +#define _MM_FROUND_RAISE_EXC 0x00 +#define _MM_FROUND_NO_EXC 0x08 + +#define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT) +#define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF) +#define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF) +#define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO) +#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION) +#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION) + +#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL) +#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL) +#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL) +#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL) + +#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR) +#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) +#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) +#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) + +#define _mm_round_ps(X, M) __extension__ ({ \ + (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)); }) + +#define _mm_round_ss(X, Y, M) __extension__ ({ \ + (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (M)); }) + +#define _mm_round_pd(X, M) __extension__ ({ \ + (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); }) + +#define _mm_round_sd(X, Y, M) __extension__ ({ \ + (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (M)); }) + +/* SSE4 Packed Blending Intrinsics. */ +#define _mm_blend_pd(V1, V2, M) __extension__ ({ \ + (__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \ + (__v2df)(__m128d)(V2), \ + (((M) & 0x01) ? 2 : 0), \ + (((M) & 0x02) ? 3 : 1)); }) + +#define _mm_blend_ps(V1, V2, M) __extension__ ({ \ + (__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \ + (((M) & 0x01) ? 4 : 0), \ + (((M) & 0x02) ? 5 : 1), \ + (((M) & 0x04) ? 6 : 2), \ + (((M) & 0x08) ? 7 : 3)); }) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2, + (__v2df)__M); +#endif +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2, + (__v4sf)__M); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2, + (__v16qi)__M); +#endif +} + +#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \ + (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \ + (__v8hi)(__m128i)(V2), \ + (((M) & 0x01) ? 8 : 0), \ + (((M) & 0x02) ? 9 : 1), \ + (((M) & 0x04) ? 10 : 2), \ + (((M) & 0x08) ? 11 : 3), \ + (((M) & 0x10) ? 12 : 4), \ + (((M) & 0x20) ? 13 : 5), \ + (((M) & 0x40) ? 14 : 6), \ + (((M) & 0x80) ? 15 : 7)); }) + +/* SSE4 Dword Multiply Instructions. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mullo_epi32 (__m128i __V1, __m128i __V2) +{ + return (__m128i) ((__v4si)__V1 * (__v4si)__V2); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mul_epi32 (__m128i __V1, __m128i __V2) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2); +#endif +} + +/* SSE4 Floating Point Dot Product Instructions. */ +#define _mm_dp_ps(X, Y, M) __extension__ ({ \ + (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (M)); }) + +#define _mm_dp_pd(X, Y, M) __extension__ ({\ + (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (M)); }) + +/* SSE4 Streaming Load Hint Instruction. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_stream_load_si128 (__m128i const *__V) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return (__m128i) __builtin_ia32_movntdqa ((const __v2di *) __V); +#endif +} + +/* SSE4 Packed Integer Min/Max Instructions. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_min_epi8 (__m128i __V1, __m128i __V2) +{ +#ifdef __EMSCRIPTEN__ + return _mm_xor_si128(__V2, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi8(__V1, __V2))); +#else + return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_max_epi8 (__m128i __V1, __m128i __V2) +{ +#ifdef __EMSCRIPTEN__ + return _mm_xor_si128(__V1, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi8(__V1, __V2))); +#else + return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_min_epu16 (__m128i __V1, __m128i __V2) +{ +#ifdef __EMSCRIPTEN__ + __m128 __shift = (__m128)emscripten_int16x8_splat(-32768); + return _mm_xor_si128(__V2, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi16(_mm_sub_epi16(__V1, __shift), _mm_sub_epi16(__V2, __shift)))); +#else + return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_max_epu16 (__m128i __V1, __m128i __V2) +{ +#ifdef __EMSCRIPTEN__ + __m128 __shift = (__m128)emscripten_int16x8_splat(-32768); + return _mm_xor_si128(__V1, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi16(_mm_sub_epi16(__V1, __shift), _mm_sub_epi16(__V2, __shift)))); +#else + return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_min_epi32 (__m128i __V1, __m128i __V2) +{ +#ifdef __EMSCRIPTEN__ + return _mm_xor_si128(__V2, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi32(__V1, __V2))); +#else + return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_max_epi32 (__m128i __V1, __m128i __V2) +{ +#ifdef __EMSCRIPTEN__ + return _mm_xor_si128(__V1, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi32(__V1, __V2))); +#else + return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_min_epu32 (__m128i __V1, __m128i __V2) +{ +#ifdef __EMSCRIPTEN__ + __m128 __shift = (__m128)emscripten_int32x4_splat((int)0x80000000U); + return _mm_xor_si128(__V2, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi32(_mm_sub_epi16(__V1, __shift), _mm_sub_epi32(__V2, __shift)))); +#else + return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_max_epu32 (__m128i __V1, __m128i __V2) +{ +#ifdef __EMSCRIPTEN__ + __m128 __shift = (__m128)emscripten_int32x4_splat((int)0x80000000U); + return _mm_xor_si128(__V1, _mm_and_si128(_mm_xor_si128(__V1, __V2), _mm_cmplt_epi32(_mm_sub_epi16(__V1, __shift), _mm_sub_epi32(__V2, __shift)))); +#else + return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2); +#endif +} + +/* SSE4 Insertion and Extraction from XMM Register Instructions. */ +#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N)) +#define _mm_extract_ps(X, N) (__extension__ \ + ({ union { int __i; float __f; } __t; \ + __v4sf __a = (__v4sf)(__m128)(X); \ + __t.__f = __a[(N) & 3]; \ + __t.__i;})) + +/* Miscellaneous insert and extract macros. */ +/* Extract a single-precision float from X at index N into D. */ +#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \ + (D) = __a[N]; })) + +/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create + an index suitable for _mm_insert_ps. */ +#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z)) + +/* Extract a float from X at index N into the first index of the return. */ +#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \ + _MM_MK_INSERTPS_NDX((N), 0, 0x0e)) + +/* Insert int into packed integer array at index. */ +#define _mm_insert_epi8(X, I, N) (__extension__ \ + ({ __v16qi __a = (__v16qi)(__m128i)(X); \ + __a[(N) & 15] = (I); \ + __a;})) +#define _mm_insert_epi32(X, I, N) (__extension__ \ + ({ __v4si __a = (__v4si)(__m128i)(X); \ + __a[(N) & 3] = (I); \ + __a;})) +#ifdef __x86_64__ +#define _mm_insert_epi64(X, I, N) (__extension__ \ + ({ __v2di __a = (__v2di)(__m128i)(X); \ + __a[(N) & 1] = (I); \ + __a;})) +#endif /* __x86_64__ */ + +/* Extract int from packed integer array at index. This returns the element + * as a zero extended value, so it is unsigned. + */ +#define _mm_extract_epi8(X, N) (__extension__ \ + ({ __v16qi __a = (__v16qi)(__m128i)(X); \ + (int)(unsigned char) __a[(N) & 15];})) +#define _mm_extract_epi32(X, N) (__extension__ \ + ({ __v4si __a = (__v4si)(__m128i)(X); \ + (int)__a[(N) & 3];})) +#ifdef __x86_64__ +#define _mm_extract_epi64(X, N) (__extension__ \ + ({ __v2di __a = (__v2di)(__m128i)(X); \ + (long long)__a[(N) & 1];})) +#endif /* __x86_64 */ + +/* SSE4 128-bit Packed Integer Comparisons. */ +static __inline__ int __DEFAULT_FN_ATTRS +_mm_testz_si128(__m128i __M, __m128i __V) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V); +#endif +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_testc_si128(__m128i __M, __m128i __V) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V); +#endif +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm_testnzc_si128(__m128i __M, __m128i __V) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); +#endif +} + +#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V))) +#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V)) +#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V)) + +/* SSE4 64-bit Packed Integer Comparisons. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cmpeq_epi64(__m128i __V1, __m128i __V2) +{ + return (__m128i)((__v2di)__V1 == (__v2di)__V2); +} + +/* SSE4 Packed Integer Sign-Extension. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi8_epi16(__m128i __V) +{ + /* This function always performs a signed extension, but __v16qi is a char + which may be signed or unsigned, so use __v16qs. */ + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi8_epi32(__m128i __V) +{ + /* This function always performs a signed extension, but __v16qi is a char + which may be signed or unsigned, so use __v16qs. */ + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi8_epi64(__m128i __V) +{ + /* This function always performs a signed extension, but __v16qi is a char + which may be signed or unsigned, so use __v16qs. */ + typedef signed char __v16qs __attribute__((__vector_size__(16))); + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi16_epi32(__m128i __V) +{ + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi16_epi64(__m128i __V) +{ + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi32_epi64(__m128i __V) +{ + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di); +} + +/* SSE4 Packed Integer Zero-Extension. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepu8_epi16(__m128i __V) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepu8_epi32(__m128i __V) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepu8_epi64(__m128i __V) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepu16_epi32(__m128i __V) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepu16_epi64(__m128i __V) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V); +#endif +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepu32_epi64(__m128i __V) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V); +#endif +} + +/* SSE4 Pack with Unsigned Saturation. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_packus_epi32(__m128i __V1, __m128i __V2) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2); +#endif +} + +/* SSE4 Multiple Packed Sums of Absolute Difference. */ +#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \ + (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \ + (__v16qi)(__m128i)(Y), (M)); }) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_minpos_epu16(__m128i __V) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V); +#endif +} + +/* Handle the sse4.2 definitions here. */ + +/* These definitions are normally in nmmintrin.h, but gcc puts them in here + so we'll do the same. */ + +#undef __DEFAULT_FN_ATTRS +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) + +/* These specify the type of data that we're comparing. */ +#define _SIDD_UBYTE_OPS 0x00 +#define _SIDD_UWORD_OPS 0x01 +#define _SIDD_SBYTE_OPS 0x02 +#define _SIDD_SWORD_OPS 0x03 + +/* These specify the type of comparison operation. */ +#define _SIDD_CMP_EQUAL_ANY 0x00 +#define _SIDD_CMP_RANGES 0x04 +#define _SIDD_CMP_EQUAL_EACH 0x08 +#define _SIDD_CMP_EQUAL_ORDERED 0x0c + +/* These macros specify the polarity of the operation. */ +#define _SIDD_POSITIVE_POLARITY 0x00 +#define _SIDD_NEGATIVE_POLARITY 0x10 +#define _SIDD_MASKED_POSITIVE_POLARITY 0x20 +#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30 + +/* These macros are used in _mm_cmpXstri() to specify the return. */ +#define _SIDD_LEAST_SIGNIFICANT 0x00 +#define _SIDD_MOST_SIGNIFICANT 0x40 + +/* These macros are used in _mm_cmpXstri() to specify the return. */ +#define _SIDD_BIT_MASK 0x00 +#define _SIDD_UNIT_MASK 0x40 + +/* SSE4.2 Packed Comparison Intrinsics. */ +#define _mm_cmpistrm(A, B, M) \ + (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(M)) +#define _mm_cmpistri(A, B, M) \ + (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(M)) + +#define _mm_cmpestrm(A, LA, B, LB, M) \ + (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \ + (__v16qi)(__m128i)(B), (int)(LB), \ + (int)(M)) +#define _mm_cmpestri(A, LA, B, LB, M) \ + (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \ + (__v16qi)(__m128i)(B), (int)(LB), \ + (int)(M)) + +/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ +#define _mm_cmpistra(A, B, M) \ + (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(M)) +#define _mm_cmpistrc(A, B, M) \ + (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(M)) +#define _mm_cmpistro(A, B, M) \ + (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(M)) +#define _mm_cmpistrs(A, B, M) \ + (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(M)) +#define _mm_cmpistrz(A, B, M) \ + (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), (int)(M)) + +#define _mm_cmpestra(A, LA, B, LB, M) \ + (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \ + (__v16qi)(__m128i)(B), (int)(LB), \ + (int)(M)) +#define _mm_cmpestrc(A, LA, B, LB, M) \ + (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \ + (__v16qi)(__m128i)(B), (int)(LB), \ + (int)(M)) +#define _mm_cmpestro(A, LA, B, LB, M) \ + (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \ + (__v16qi)(__m128i)(B), (int)(LB), \ + (int)(M)) +#define _mm_cmpestrs(A, LA, B, LB, M) \ + (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \ + (__v16qi)(__m128i)(B), (int)(LB), \ + (int)(M)) +#define _mm_cmpestrz(A, LA, B, LB, M) \ + (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \ + (__v16qi)(__m128i)(B), (int)(LB), \ + (int)(M)) + +/* SSE4.2 Compare Packed Data -- Greater Than. */ +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cmpgt_epi64(__m128i __V1, __m128i __V2) +{ + return (__m128i)((__v2di)__V1 > (__v2di)__V2); +} + +/* SSE4.2 Accumulate CRC32. */ +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_crc32_u8(unsigned int __C, unsigned char __D) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return __builtin_ia32_crc32qi(__C, __D); +#endif +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_crc32_u16(unsigned int __C, unsigned short __D) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return __builtin_ia32_crc32hi(__C, __D); +#endif +} + +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_crc32_u32(unsigned int __C, unsigned int __D) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return __builtin_ia32_crc32si(__C, __D); +#endif +} + +#ifdef __x86_64__ +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_mm_crc32_u64(unsigned long long __C, unsigned long long __D) +{ +#ifdef __EMSCRIPTEN__ + // +#else + return __builtin_ia32_crc32di(__C, __D); +#endif +} +#endif /* __x86_64__ */ + +#undef __DEFAULT_FN_ATTRS + +#ifdef __POPCNT__ +#include +#endif + +#endif /* _SMMINTRIN_H */ diff --git a/system/include/emscripten/x86intrin.h b/system/include/emscripten/x86intrin.h index 3c0df78fc7041..bf18cf56dea6e 100644 --- a/system/include/emscripten/x86intrin.h +++ b/system/include/emscripten/x86intrin.h @@ -21,4 +21,8 @@ #include #endif +#if __SSE4_1__ +#include +#endif + #endif diff --git a/tests/test_core.py b/tests/test_core.py index 3fe190b24a645..d8cb2b549bfe0 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -5869,6 +5869,20 @@ def test_ssse3_full(self): self.emcc_args = orig_args + mode + ['-I' + path_from_root('tests'), '-mssse3'] + args self.do_run(open(path_from_root('tests', 'test_ssse3_full.cpp'), 'r').read(), native_result) + @SIMD + def test_sse4_1_full(self): + args = [] + if '-O0' in self.emcc_args: args += ['-D_DEBUG=1'] + Popen([CLANG, path_from_root('tests', 'test_sse4_1_full.cpp'), '-o', 'test_sse4_1_full', '-D_CRT_SECURE_NO_WARNINGS=1', '-msse4.1'] + args + get_clang_native_args(), env=get_clang_native_env(), stdout=PIPE).communicate() + native_result, err = Popen('./test_sse4_1_full', stdout=PIPE).communicate() + native_result = native_result.replace('\r\n', '\n') # Windows line endings fix + + Settings.PRECISE_F32 = 1 # SIMD currently requires Math.fround + orig_args = self.emcc_args + for mode in [[], ['-s', 'SIMD=1']]: + self.emcc_args = orig_args + mode + ['-I' + path_from_root('tests'), '-msse4.1'] + args + self.do_run(open(path_from_root('tests', 'test_sse4_1_full.cpp'), 'r').read(), native_result) + @SIMD def test_simd(self): test_path = path_from_root('tests', 'core', 'test_simd') diff --git a/tests/test_sse4_1_full.cpp b/tests/test_sse4_1_full.cpp new file mode 100644 index 0000000000000..74c85814a84c4 --- /dev/null +++ b/tests/test_sse4_1_full.cpp @@ -0,0 +1,104 @@ +// This file uses SSE4.1 by calling different functions with different interesting inputs and prints the results. +// Use a diff tool to compare the results between platforms. + +#include +#define ENABLE_SSE2 +#include "test_sse_full.h" + +float *interesting_floats = get_interesting_floats(); +int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]); +uint32_t *interesting_ints = get_interesting_ints(); +int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]); +double *interesting_doubles = get_interesting_doubles(); +int numInterestingDoubles = sizeof(interesting_doubles_)/sizeof(interesting_doubles_[0]); + +int main() +{ + assert(numInterestingFloats % 4 == 0); + assert(numInterestingInts % 4 == 0); + assert(numInterestingDoubles % 4 == 0); + + Ret_M128i_M128i_Tint(__m128i, _mm_blend_epi16); + Ret_M128d_M128d_Tint(__m128d, _mm_blend_pd); + Ret_M128_M128_Tint(__m128, _mm_blend_ps); + // _mm_blendv_epi8 + // _mm_blendv_pd + // _mm_blendv_ps + // _mm_ceil_pd + // _mm_ceil_ps + // _mm_ceil_sd + // _mm_ceil_ss + // M128i_M128i_M128i(_mm_cmpeq_epi64); + // Ret_M128i(__m128i, _mm_cvtepi16_epi32); + // Ret_M128i(__m128i, _mm_cvtepi16_epi64); + // Ret_M128i(__m128i, _mm_cvtepi32_epi64); + // Ret_M128i(__m128i, _mm_cvtepi8_epi16); + // Ret_M128i(__m128i, _mm_cvtepi8_epi32); + // Ret_M128i(__m128i, _mm_cvtepi8_epi64); + // _mm_cvtepu16_epi32 + // _mm_cvtepu16_epi64 + // _mm_cvtepu32_epi64 + // _mm_cvtepu8_epi16 + // _mm_cvtepu8_epi32 + // _mm_cvtepu8_epi64 + // _mm_dp_pd + // _mm_dp_ps + Ret_M128i_Tint(int, _mm_extract_epi32); + Ret_M128i_Tint(int, _mm_extract_epi8); + // Ret_M128i_Tint(long long, _mm_extract_epi64); + Ret_M128i_Tint(int, _mm_extract_epi8); + Ret_M128_Tint(float, _mm_extract_ps); + // _mm_floor_pd + // _mm_floor_ps + // _mm_floor_sd + // _mm_floor_ss + // _mm_insert_epi32 + // _mm_insert_epi64 + // _mm_insert_epi8 + // _mm_insert_ps + M128i_M128i_M128i(_mm_max_epi32); + M128i_M128i_M128i(_mm_max_epi8); + M128i_M128i_M128i(_mm_max_epu16); + M128i_M128i_M128i(_mm_max_epu32); + M128i_M128i_M128i(_mm_min_epi32); + M128i_M128i_M128i(_mm_min_epi8); + M128i_M128i_M128i(_mm_min_epu16); + M128i_M128i_M128i(_mm_min_epu32); + // _mm_minpos_epu16 + // _mm_mpsadbw_epu8 + // M128i_M128i_M128i(_mm_mul_epi32); + M128i_M128i_M128i(_mm_mullo_epi32); + // _mm_packus_epi32 + // _mm_round_pd + // _mm_round_ps + // _mm_round_sd + // _mm_round_ss + // _mm_stream_load_si128 + // _mm_test_all_ones + // _mm_test_all_zeros + // _mm_test_mix_ones_zeros + // _mm_testc_si128 + // _mm_testnzc_si128 + // _mm_testz_si128 + + // SSE 4.2: + // _mm_cmpestra + // _mm_cmpestrc + // _mm_cmpestri + // _mm_cmpestrm + // _mm_cmpestro + // _mm_cmpestrs + // _mm_cmpestrz + // M128i_M128i_M128i(_mm_cmpgt_epi64); + // _mm_cmpistra + // _mm_cmpistrc + // _mm_cmpistri + // _mm_cmpistrm + // _mm_cmpistro + // _mm_cmpistrs + // _mm_cmpistrz + // _mm_crc32_u16 + // _mm_crc32_u32 + // _mm_crc32_u64 + // _mm_crc32_u8 +} diff --git a/tests/test_sse_full.h b/tests/test_sse_full.h index f4de83aec273d..b6d7bcb9b516c 100644 --- a/tests/test_sse_full.h +++ b/tests/test_sse_full.h @@ -276,6 +276,17 @@ __m128 ExtractInRandomOrder(float *arr, int i, int n, int prime) printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ } +#define Ret_M128_Tint_body(Ret_type, func, Tint) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + Ret_type ret = func(m1, Tint); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s, %d) = %s\n", #func, str, Tint, str2); \ + } + #define Ret_M128i_Tint_body(Ret_type, func, Tint) \ for(int i = 0; i < numInterestingInts / 4; ++i) \ for(int k = 0; k < 4; ++k) \ @@ -368,6 +379,7 @@ __m128 ExtractInRandomOrder(float *arr, int i, int n, int prime) F(Ret_type, func, 255); \ F(Ret_type, func, 309); +#define Ret_M128_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128_Tint_body, func) #define Ret_M128i_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_Tint_body, func) #define Ret_M128i_int_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_int_Tint_body, func) #define Ret_M128i_M128i_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_M128i_Tint_body, func) From d0a002becc32a46bc268f5bf3b272ccd1ffb9488 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 14 Feb 2016 11:22:17 -0800 Subject: [PATCH 28/49] ignore all errors in has_hidden_attribute in file packager, to not break on unicode or other oddities --- tools/file_packager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/file_packager.py b/tools/file_packager.py index 30e085878d6ff..832ceef59b581 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -209,7 +209,7 @@ def has_hidden_attribute(filepath): attrs = ctypes.windll.kernel32.GetFileAttributesW(unicode(filepath)) assert attrs != -1 result = bool(attrs & 2) - except (AttributeError, AssertionError): + except: result = False return result From 8e427f68e348813fdecca2165a889010f2d920e6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 14 Feb 2016 11:18:08 -0800 Subject: [PATCH 29/49] add test for unicode in file packager --- tests/test_other.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/test_other.py b/tests/test_other.py index b8fecfe32c15e..cac670652250d 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -1,3 +1,5 @@ +# coding=utf-8 + import multiprocessing, os, pipes, re, shutil, subprocess, sys import glob import tools.shared @@ -2161,6 +2163,24 @@ def clean(txt): except ValueError: assert False + def test_file_packager_unicode(self): + unicode_name = 'unicode…☃' + if not os.path.exists(unicode_name): + try: + os.mkdir(unicode_name) + except: + print "we failed to even create a unicode dir, so on this OS, we can't test this" + return + full = os.path.join(unicode_name, 'data.txt') + open(full, 'w').write('data') + proc = Popen([PYTHON, FILE_PACKAGER, 'test.data', '--preload', full], stdout=PIPE, stderr=PIPE) + out, err = proc.communicate() + assert proc.returncode == 0, err + assert len(out) > 0, err + assert len(err) == 0, err + assert unicode_name in out, out + print len(err) + def test_crunch(self): try: print 'Crunch is located at ' + CRUNCH From 27ceda8a26807589b0e07aab697c30a3e915e08f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 16 Feb 2016 13:56:13 -0800 Subject: [PATCH 30/49] add test for fastcomp #137 --- tests/cases/rust_struct.ll | 39 +++++++++++++++++++++++++++++++++++++ tests/cases/rust_struct.txt | 1 + 2 files changed, 40 insertions(+) create mode 100644 tests/cases/rust_struct.ll create mode 100644 tests/cases/rust_struct.txt diff --git a/tests/cases/rust_struct.ll b/tests/cases/rust_struct.ll new file mode 100644 index 0000000000000..9b24b6c439820 --- /dev/null +++ b/tests/cases/rust_struct.ll @@ -0,0 +1,39 @@ +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +@.str = private unnamed_addr constant [9 x i8] c"*%d,%d*\0A\00" ; [#uses=1] + +define {i32, i32} @read_pair({i32, i32}* %ptr) norecurse nounwind readonly uwtable { +entry: + %value = load {i32, i32}, {i32, i32}* %ptr, align 4 + ret {i32, i32} %value +} + +; [#uses=0] +define i32 @main() { +entry: + %a = alloca {i32, i32}, align 4 + %a0 = getelementptr {i32, i32}, {i32, i32}* %a, i32 0, i32 0 + %a1 = getelementptr {i32, i32}, {i32, i32}* %a, i32 0, i32 1 + %b = alloca {i32, i32}, align 4 + %b0 = getelementptr {i32, i32}, {i32, i32}* %b, i32 0, i32 0 + %b1 = getelementptr {i32, i32}, {i32, i32}* %b, i32 0, i32 1 + + ; Initialize a with {1234, 5678} and b with {0, 0} + store i32 1234, i32* %a0, align 4 + store i32 5678, i32* %a1, align 4 + store i32 0, i32* %b0, align 4 + store i32 0, i32* %b1, align 4 + + ; This call should remain in the output. + %v = call {i32, i32} @read_pair({i32, i32}* %a) + store {i32, i32} %v, {i32, i32}* %b, align 4 + + %b0v = load i32, i32* %b0, align 4 + %b1v = load i32, i32* %b1, align 4 + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %b0v, i32 %b1v) ; [#uses=0 type=i32] + ret i32 1 +} + +; [#uses=1] +declare i32 @printf(i8*, ...) diff --git a/tests/cases/rust_struct.txt b/tests/cases/rust_struct.txt new file mode 100644 index 0000000000000..7aa5e93cf1248 --- /dev/null +++ b/tests/cases/rust_struct.txt @@ -0,0 +1 @@ +*1234,5678*` From 992d74a1907183d60a9e37e687af96c3b38f8f27 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 16 Feb 2016 18:05:53 -0800 Subject: [PATCH 31/49] fix test output --- tests/cases/rust_struct.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cases/rust_struct.txt b/tests/cases/rust_struct.txt index 7aa5e93cf1248..e05ddb157f22e 100644 --- a/tests/cases/rust_struct.txt +++ b/tests/cases/rust_struct.txt @@ -1 +1 @@ -*1234,5678*` +*1234,5678* From aeabdb27b7a1a5dd23c6d466283cea947a395000 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 17 Feb 2016 11:23:38 -0800 Subject: [PATCH 32/49] docs on link speed --- site/source/docs/getting_started/FAQ.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/site/source/docs/getting_started/FAQ.rst b/site/source/docs/getting_started/FAQ.rst index ce71499f2bbe1..3274aa521323f 100644 --- a/site/source/docs/getting_started/FAQ.rst +++ b/site/source/docs/getting_started/FAQ.rst @@ -64,6 +64,10 @@ The main tips for improving build time are: - When you have multiple bitcode files as inputs, put the largest file first (LLVM linking links the second and later ones into the first, so less copying is done on the first input to the linker). +- Having fewer bitcode files can be faster, so you might want to link files into larger files in parallel in your build system (you might already do this if you have logical libraries), and then the final command has fewer things to operate on. + +- You don't need to link into a single bitcode file yourself, you can call the final ``emcc`` command that emits JS with a list of files. ``emcc`` can then defer linking and avoid an intermediary step, if possible (this optimization is disabled by LTO and by `EMCC_DEBUG=2`). + Why does my code run slowly? ============================ From b321e756c5b45dbd25ed401c10663aed9d86d525 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Wed, 17 Feb 2016 13:34:35 -0800 Subject: [PATCH 33/49] Define the LDBL_* macros in terms of the corresponding compiler predefined macros. These macros have the same values when compiling with fastcomp. This change enables use of the LLVM wasm backend, which defines long double to be 128-bit. --- .../libc/musl/arch/emscripten/bits/float.h | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/system/lib/libc/musl/arch/emscripten/bits/float.h b/system/lib/libc/musl/arch/emscripten/bits/float.h index 89e9eb6efcfa5..53ec2d10876e6 100644 --- a/system/lib/libc/musl/arch/emscripten/bits/float.h +++ b/system/lib/libc/musl/arch/emscripten/bits/float.h @@ -1,17 +1,17 @@ #define FLT_ROUNDS 1 -#define FLT_EVAL_METHOD 0 +#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__ -#define LDBL_TRUE_MIN 4.9406564584124654e-324 -#define LDBL_MIN 2.2250738585072014e-308 -#define LDBL_MAX 1.7976931348623157e+308 -#define LDBL_EPSILON 2.2204460492503131e-16 +#define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ +#define LDBL_MIN __LDBL_MIN__ +#define LDBL_MAX __LDBL_MAX__ +#define LDBL_EPSILON __LDBL_EPSILON__ -#define LDBL_MANT_DIG 53 -#define LDBL_MIN_EXP (-1021) -#define LDBL_MAX_EXP 1024 +#define LDBL_MANT_DIG __LDBL_MANT_DIG__ +#define LDBL_MIN_EXP __LDBL_MIN_EXP__ +#define LDBL_MAX_EXP __LDBL_MAX_EXP__ -#define LDBL_DIG 15 -#define LDBL_MIN_10_EXP (-307) -#define LDBL_MAX_10_EXP 308 +#define LDBL_DIG __LDBL_DIG__ +#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__ +#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__ -#define DECIMAL_DIG 17 +#define DECIMAL_DIG __DECIMAL_DIG__ From 2b7299f6c08f8c64104f2daa6a82c64173f81fdd Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Wed, 17 Feb 2016 14:45:19 -0800 Subject: [PATCH 34/49] Use "%zu" for printing size_t values. This makes the code work without warnings regardless of whether size_t is "unsigned" or "unsigned long". --- tools/gen_struct_info.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/gen_struct_info.py b/tools/gen_struct_info.py index e2b3b78fe274f..65a2153879d12 100644 --- a/tools/gen_struct_info.py +++ b/tools/gen_struct_info.py @@ -312,10 +312,10 @@ def gen_inspect_code(path, struct, code): c_descent(path[-1], code) if len(path) == 1: - c_set('__size__', 'i%u', 'sizeof (' + prefix + path[0] + ')', code) + c_set('__size__', 'i%zu', 'sizeof (' + prefix + path[0] + ')', code) else: - c_set('__size__', 'i%u', 'sizeof ((' + prefix + path[0] + ' *)0)->' + '.'.join(path[1:]), code) - #c_set('__offset__', 'i%u', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:]) + ')', code) + c_set('__size__', 'i%zu', 'sizeof ((' + prefix + path[0] + ' *)0)->' + '.'.join(path[1:]), code) + #c_set('__offset__', 'i%zu', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:]) + ')', code) for field in struct: if isinstance(field, dict): @@ -323,7 +323,7 @@ def gen_inspect_code(path, struct, code): fname = field.keys()[0] gen_inspect_code(path + [fname], field[fname], code) else: - c_set(field, 'i%u', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:] + [field]) + ')', code) + c_set(field, 'i%zu', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:] + [field]) + ')', code) c_ascent(code) From db6b79ad504f20669dda94132c95b7c4dfc0c56f Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 18 Feb 2016 11:28:48 -0800 Subject: [PATCH 35/49] Fix parsing of SpiderMonkey errors for SIMD types. Spidermonkey recently started emitting two different error messages for unsupported SIMD types: asm.js type error: 'Int64x2' is not a standard SIMD type asm.js type error: 'Int8x16' is not a supported SIMD type Fix the error parser in the test runner to recognize both variants. --- tests/runner.py | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/tests/runner.py b/tests/runner.py index 7d676c2c0e9ac..f9353bac143f6 100755 --- a/tests/runner.py +++ b/tests/runner.py @@ -332,24 +332,23 @@ def build(self, src, dirname, filename, output_processor=None, main_file=None, a assert ('/* memory initializer */' not in src) or ('/* memory initializer */ allocate([]' in src) def validate_asmjs(self, err): - if "asm.js type error: 'Int8x16' is not a standard SIMD type" in err: - err = err.replace("asm.js type error: 'Int8x16' is not a standard SIMD type", "") - print >> sys.stderr, "\nWARNING: ignoring asm.js type error from Int8x16 due to implementation not yet available in SpiderMonkey. See https://bugzilla.mozilla.org/show_bug.cgi?id=1136226\n" - if "asm.js type error: 'Int16x8' is not a standard SIMD type" in err: - err = err.replace("asm.js type error: 'Int16x8' is not a standard SIMD type", "") - print >> sys.stderr, "\nWARNING: ignoring asm.js type error from Int16x8 due to implementation not yet available in SpiderMonkey. See https://bugzilla.mozilla.org/show_bug.cgi?id=1136226\n" - if "asm.js type error: 'Uint8x16' is not a standard SIMD type" in err: - err = err.replace("asm.js type error: 'Uint8x16' is not a standard SIMD type", "") - print >> sys.stderr, "\nWARNING: ignoring asm.js type error from Uint8x16 due to implementation not yet available in SpiderMonkey. See https://bugzilla.mozilla.org/show_bug.cgi?id=1244117\n" - if "asm.js type error: 'Uint16x8' is not a standard SIMD type" in err: - err = err.replace("asm.js type error: 'Uint16x8' is not a standard SIMD type", "") - print >> sys.stderr, "\nWARNING: ignoring asm.js type error from Uint16x8 due to implementation not yet available in SpiderMonkey. See https://bugzilla.mozilla.org/show_bug.cgi?id=1244117\n" - if "asm.js type error: 'Uint32x4' is not a standard SIMD type" in err: - err = err.replace("asm.js type error: 'Uint32x4' is not a standard SIMD type", "") - print >> sys.stderr, "\nWARNING: ignoring asm.js type error from Uint32x4 due to implementation not yet available in SpiderMonkey. See https://bugzilla.mozilla.org/show_bug.cgi?id=1240796\n" - if "asm.js type error: 'Float64x2' is not a standard SIMD type" in err: - err = err.replace("asm.js type error: 'Float64x2' is not a standard SIMD type", "") - print >> sys.stderr, "\nWARNING: ignoring asm.js type error from Float64x2 due to implementation not yet available in SpiderMonkey. See https://bugzilla.mozilla.org/show_bug.cgi?id=1124205\n" + m = re.search("asm.js type error: '(\w+)' is not a (standard|supported) SIMD type", err) + if m: + # Bug numbers for missing SIMD types: + bugs = { + 'Int8x16' : 1136226, + 'Int16x8' : 1136226, + 'Uint8x16' : 1244117, + 'Uint16x8' : 1244117, + 'Uint32x4' : 1240796, + 'Float64x2': 1124205, + } + simd = m.group(1) + if simd in bugs: + print >> sys.stderr, ("\nWARNING: ignoring asm.js type error from {} due to implementation not yet available in SpiderMonkey." + + " See https://bugzilla.mozilla.org/show_bug.cgi?id={}\n").format(simd, bugs[simd]) + err = err.replace(m.group(0), '') + if 'uccessfully compiled asm.js code' in err and 'asm.js link error' not in err: print >> sys.stderr, "[was asm.js'ified]" elif 'asm.js' in err: # if no asm.js error, then not an odin build From 87ef7141c890653d8c575f2bc06d8d3d0daa2fb4 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 18 Feb 2016 12:21:48 -0800 Subject: [PATCH 36/49] update ccall regex to handle function names in the output #4111 --- src/preamble.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/preamble.js b/src/preamble.js index 0c0f943d14d74..0b4a6a41d0a4e 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -192,7 +192,7 @@ var cwrap, ccall; } #if NO_DYNAMIC_EXECUTION == 0 - var sourceRegex = /^function\s*\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/; + var sourceRegex = /^function\s*[a-zA-Z]*\s*\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/; function parseJSFunc(jsfunc) { // Match the body and the return value of a javascript function source var parsed = jsfunc.toString().match(sourceRegex).slice(1); From 3f8f00f4f5024056b14c205d3f9a9e3b46e644be Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 18 Feb 2016 12:47:07 -0800 Subject: [PATCH 37/49] Add myself to AUTHORS --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 424e0eff9661e..c17ac199be99e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -236,3 +236,4 @@ a license to everyone to use it as detailed in LICENSE.) * Arnab Choudhury (copyright owned by Tableau Software, Inc.) * Charles Vaughn (copyright owned by Tableau Software, Inc.) * Pierre Krieger +* Jakob Stoklund Olesen From db18594c918091fa9d76a7d88931fb1885907c10 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 18 Feb 2016 14:18:55 -0800 Subject: [PATCH 38/49] support -include-pch flag #4086 --- emcc.py | 2 +- tests/test_other.py | 55 +++++++++++++++++++++++++++------------------ 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/emcc.py b/emcc.py index 06d2cb6154e21..6d422c27a7ee3 100755 --- a/emcc.py +++ b/emcc.py @@ -771,7 +771,7 @@ def validate_arg_level(level_string, max_level, err_msg): if i > 0: prev = newargs[i-1] - if prev in ['-MT', '-MF', '-MQ', '-D', '-U', '-o', '-x', '-Xpreprocessor', '-include', '-imacros', '-idirafter', '-iprefix', '-iwithprefix', '-iwithprefixbefore', '-isysroot', '-imultilib', '-A', '-isystem', '-iquote', '-install_name', '-compatibility_version', '-current_version', '-I', '-L']: continue # ignore this gcc-style argument + if prev in ['-MT', '-MF', '-MQ', '-D', '-U', '-o', '-x', '-Xpreprocessor', '-include', '-imacros', '-idirafter', '-iprefix', '-iwithprefix', '-iwithprefixbefore', '-isysroot', '-imultilib', '-A', '-isystem', '-iquote', '-install_name', '-compatibility_version', '-current_version', '-I', '-L', '-include-pch']: continue # ignore this gcc-style argument if os.path.islink(arg) and os.path.realpath(arg).endswith(SOURCE_ENDINGS + BITCODE_ENDINGS + DYNAMICLIB_ENDINGS + ASSEMBLY_ENDINGS + HEADER_ENDINGS): arg = os.path.realpath(arg) diff --git a/tests/test_other.py b/tests/test_other.py index cac670652250d..c75d9c47ca7ea 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -2540,36 +2540,47 @@ def test_module_print(self): assert r'<{(123456789)}>' in output, output def test_precompiled_headers(self): - self.clear() + for suffix in ['gch', 'pch']: + print suffix + self.clear() - open('header.h', 'w').write('#define X 5\n') - Popen([PYTHON, EMCC, '-xc++-header', 'header.h', '-c']).communicate() - assert os.path.exists('header.h.gch') + open('header.h', 'w').write('#define X 5\n') + Popen([PYTHON, EMCC, '-xc++-header', 'header.h', '-c']).communicate() + assert os.path.exists('header.h.gch') # default output is gch + if suffix != 'gch': + Popen([PYTHON, EMCC, '-xc++-header', 'header.h', '-o', 'header.h.' + suffix]).communicate() + assert open('header.h.gch').read() == open('header.h.' + suffix).read() - open('src.cpp', 'w').write(r''' + open('src.cpp', 'w').write(r''' #include int main() { printf("|%d|\n", X); return 0; } ''') - Popen([PYTHON, EMCC, 'src.cpp', '-include', 'header.h']).communicate() - - output = run_js(self.in_dir('a.out.js'), stderr=PIPE, full_output=True, engine=NODE_JS) - assert '|5|' in output, output - - # also verify that the gch is actually used - err = Popen([PYTHON, EMCC, 'src.cpp', '-include', 'header.h', '-Xclang', '-print-stats'], stderr=PIPE).communicate() - self.assertTextDataContained('*** PCH/Modules Loaded:\nModule: header.h.gch', err[1]) - # and sanity check it is not mentioned when not - try_delete('header.h.gch') - err = Popen([PYTHON, EMCC, 'src.cpp', '-include', 'header.h', '-Xclang', '-print-stats'], stderr=PIPE).communicate() - assert '*** PCH/Modules Loaded:\nModule: header.h.gch' not in err[1].replace('\r\n', '\n'), err[1] - - # with specified target via -o - try_delete('header.h.gch') - Popen([PYTHON, EMCC, '-xc++-header', 'header.h', '-o', 'my.gch']).communicate() - assert os.path.exists('my.gch') + Popen([PYTHON, EMCC, 'src.cpp', '-include', 'header.h']).communicate() + + output = run_js(self.in_dir('a.out.js'), stderr=PIPE, full_output=True, engine=NODE_JS) + assert '|5|' in output, output + + # also verify that the gch is actually used + err = Popen([PYTHON, EMCC, 'src.cpp', '-include', 'header.h', '-Xclang', '-print-stats'], stderr=PIPE).communicate() + self.assertTextDataContained('*** PCH/Modules Loaded:\nModule: header.h.' + suffix, err[1]) + # and sanity check it is not mentioned when not + try_delete('header.h.' + suffix) + err = Popen([PYTHON, EMCC, 'src.cpp', '-include', 'header.h', '-Xclang', '-print-stats'], stderr=PIPE).communicate() + assert '*** PCH/Modules Loaded:\nModule: header.h.' + suffix not in err[1].replace('\r\n', '\n'), err[1] + + # with specified target via -o + try_delete('header.h.' + suffix) + Popen([PYTHON, EMCC, '-xc++-header', 'header.h', '-o', 'my.' + suffix]).communicate() + assert os.path.exists('my.' + suffix) + + # -include-pch flag + Popen([PYTHON, EMCC, '-xc++-header', 'header.h', '-o', 'header.h.' + suffix]).communicate() + check_execute([PYTHON, EMCC, 'src.cpp', '-include-pch', 'header.h.' + suffix]) + output = run_js('a.out.js') + assert '|5|' in output, output def test_warn_unaligned(self): open('src.cpp', 'w').write(r''' From 7da7f6366f08c5d886e7c41b3f860e0cec5fa00c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Fri, 19 Feb 2016 03:06:23 +0200 Subject: [PATCH 39/49] Fix codemods replacement for -s USE_PTHREADS=2 and -s PRECISE_F32=2 for the case when symbol name minification happens to generate a symbol that has a dollar sign in it. Fixes tests browser.test_pthread_setspecific_mainthread and browser.test_pthread_num_logical_cores. --- tools/client_mods.py | 133 +++++++++++++++++++++++++++---------------- 1 file changed, 85 insertions(+), 48 deletions(-) diff --git a/tools/client_mods.py b/tools/client_mods.py index 399de5e0a85bb..8e6dea5d5dd0e 100644 --- a/tools/client_mods.py +++ b/tools/client_mods.py @@ -23,6 +23,10 @@ def get(settings, minified): var m = /var ([^=]+)=global\.Math\.fround;/.exec(code); var minified = m[1]; if (!minified) throw 'fail'; + + // The minified JS variable for Math.fround might contain the '$' sign, so this must be escaped to \$ to be used as a search pattern. + minified = minified.replace(/\$/g, "\\\\$$"); + do { var moar = false; // we need to re-do, as x(x( will not be fixed code = code.replace(new RegExp('[^a-zA-Z0-9\\\\$\\\\_]' + minified + '\\\\(', 'g'), function(s) { moar = true; return s[0] + '(' }); @@ -71,59 +75,92 @@ def get(settings, minified): var atomics_or = /var\s+([^=]+?)\s*=\s*global\.Atomics\.or;/.exec(code)[1]; var atomics_xor = /var\s+([^=]+?)\s*=\s*global\.Atomics\.xor;/.exec(code)[1]; + // JS variables may contain the '$' sign, so these must be escaped. However, + // the '$' sign needs to be escaped differently depending on whether it's on the + // string to search for side (espace by '\\'), or the value to replace + // with side (escape by '$'). + function escapeDollarForRegexSearch(str) { return str.replace(/\$/g, "\\\\$$"); } + function escapeDollarForRegexValue(str) { return str.replace(/\$/g, "$$$$"); } + + var wb = '([^\\\\w\\\\$])'; // word break (one character, which is backinserted) + + var s_heap8 = escapeDollarForRegexSearch(heap8); + var s_heap16 = escapeDollarForRegexSearch(heap16); + var s_heap32 = escapeDollarForRegexSearch(heap32); + var s_heapf32 = escapeDollarForRegexSearch(heapf32); + var s_heapf64 = escapeDollarForRegexSearch(heapf64); + // The Atomics built-ins take as first parameter the heap object, however when replacing those with // polyfill versions, it is not possible to pass a heap object as the first parameter. Therefore // route each call to Atomics to a polyfill function for each type, e.g. "Atomics_add(HEAP32, index, val)" -> "Atomics_add_32(index, val)" - code = code.replace(new RegExp('\\\\b' + atomics_load + '\\\\('+heap8+',', 'g'), atomics_load + "_8("); - code = code.replace(new RegExp('\\\\b' + atomics_load + '\\\\('+heap16+',', 'g'), atomics_load + "_16("); - code = code.replace(new RegExp('\\\\b' + atomics_load + '\\\\('+heap32+',', 'g'), atomics_load + "_32("); - code = code.replace(new RegExp('\\\\b' + atomics_load + '\\\\('+heapf32+',', 'g'), atomics_load + "_f32("); - code = code.replace(new RegExp('\\\\b' + atomics_load + '\\\\('+heapf64+',', 'g'), atomics_load + "_f64("); - - code = code.replace(new RegExp('\\\\b' + atomics_store + '\\\\('+heap8+',', 'g'), atomics_store + "_8("); - code = code.replace(new RegExp('\\\\b' + atomics_store + '\\\\('+heap16+',', 'g'), atomics_store + "_16("); - code = code.replace(new RegExp('\\\\b' + atomics_store + '\\\\('+heap32+',', 'g'), atomics_store + "_32("); - code = code.replace(new RegExp('\\\\b' + atomics_store + '\\\\('+heapf32+',', 'g'), atomics_store + "_f32("); - code = code.replace(new RegExp('\\\\b' + atomics_store + '\\\\('+heapf64+',', 'g'), atomics_store + "_f64("); - - code = code.replace(new RegExp('\\\\b' + atomics_add + '\\\\('+heap8+',', 'g'), atomics_add + "_8("); - code = code.replace(new RegExp('\\\\b' + atomics_add + '\\\\('+heap16+',', 'g'), atomics_add + "_16("); - code = code.replace(new RegExp('\\\\b' + atomics_add + '\\\\('+heap32+',', 'g'), atomics_add + "_32("); - - code = code.replace(new RegExp('\\\\b' + atomics_sub + '\\\\('+heap8+',', 'g'), atomics_sub + "_8("); - code = code.replace(new RegExp('\\\\b' + atomics_sub + '\\\\('+heap16+',', 'g'), atomics_sub + "_16("); - code = code.replace(new RegExp('\\\\b' + atomics_sub + '\\\\('+heap32+',', 'g'), atomics_sub + "_32("); - - code = code.replace(new RegExp('\\\\b' + atomics_and + '\\\\('+heap8+',', 'g'), atomics_and + "_8("); - code = code.replace(new RegExp('\\\\b' + atomics_and + '\\\\('+heap16+',', 'g'), atomics_and + "_16("); - code = code.replace(new RegExp('\\\\b' + atomics_and + '\\\\('+heap32+',', 'g'), atomics_and + "_32("); - - code = code.replace(new RegExp('\\\\b' + atomics_or + '\\\\('+heap8+',', 'g'), atomics_or + "_8("); - code = code.replace(new RegExp('\\\\b' + atomics_or + '\\\\('+heap16+',', 'g'), atomics_or + "_16("); - code = code.replace(new RegExp('\\\\b' + atomics_or + '\\\\('+heap32+',', 'g'), atomics_or + "_32("); - - code = code.replace(new RegExp('\\\\b' + atomics_xor + '\\\\('+heap8+',', 'g'), atomics_xor + "_8("); - code = code.replace(new RegExp('\\\\b' + atomics_xor + '\\\\('+heap16+',', 'g'), atomics_xor + "_16("); - code = code.replace(new RegExp('\\\\b' + atomics_xor + '\\\\('+heap32+',', 'g'), atomics_xor + "_32("); - - code = code.replace(new RegExp('\\\\b' + atomics_exchange + '\\\\('+heap8+',', 'g'), atomics_exchange + "_8("); - code = code.replace(new RegExp('\\\\b' + atomics_exchange + '\\\\('+heap16+',', 'g'), atomics_exchange + "_16("); - code = code.replace(new RegExp('\\\\b' + atomics_exchange + '\\\\('+heap32+',', 'g'), atomics_exchange + "_32("); - - code = code.replace(new RegExp('\\\\b' + atomics_compareExchange + '\\\\('+heap8+',', 'g'), atomics_compareExchange + "_8("); - code = code.replace(new RegExp('\\\\b' + atomics_compareExchange + '\\\\('+heap16+',', 'g'), atomics_compareExchange + "_16("); - code = code.replace(new RegExp('\\\\b' + atomics_compareExchange + '\\\\('+heap32+',', 'g'), atomics_compareExchange + "_32("); + var s_atomics_load = escapeDollarForRegexSearch(atomics_load); + var v_atomics_load = escapeDollarForRegexValue(atomics_load); + code = code.replace(new RegExp(wb + s_atomics_load + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_load + "_8("); + code = code.replace(new RegExp(wb + s_atomics_load + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_load + "_16("); + code = code.replace(new RegExp(wb + s_atomics_load + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_load + "_32("); + code = code.replace(new RegExp(wb + s_atomics_load + '\\\\('+s_heapf32+',', 'g'), '$1' + v_atomics_load + "_f32("); + code = code.replace(new RegExp(wb + s_atomics_load + '\\\\('+s_heapf64+',', 'g'), '$1' + v_atomics_load + "_f64("); + + var s_atomics_store = escapeDollarForRegexSearch(atomics_store); + var v_atomics_store = escapeDollarForRegexValue(atomics_store); + code = code.replace(new RegExp(wb + s_atomics_store + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_store + "_8("); + code = code.replace(new RegExp(wb + s_atomics_store + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_store + "_16("); + code = code.replace(new RegExp(wb + s_atomics_store + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_store + "_32("); + code = code.replace(new RegExp(wb + s_atomics_store + '\\\\('+s_heapf32+',', 'g'), '$1' + v_atomics_store + "_f32("); + code = code.replace(new RegExp(wb + s_atomics_store + '\\\\('+s_heapf64+',', 'g'), '$1' + v_atomics_store + "_f64("); + + var s_atomics_add = escapeDollarForRegexSearch(atomics_add); + var v_atomics_add = escapeDollarForRegexValue(atomics_add); + code = code.replace(new RegExp(wb + s_atomics_add + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_add + "_8("); + code = code.replace(new RegExp(wb + s_atomics_add + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_add + "_16("); + code = code.replace(new RegExp(wb + s_atomics_add + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_add + "_32("); + + var s_atomics_sub = escapeDollarForRegexSearch(atomics_sub); + var v_atomics_sub = escapeDollarForRegexValue(atomics_sub); + code = code.replace(new RegExp(wb + s_atomics_sub + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_sub + "_8("); + code = code.replace(new RegExp(wb + s_atomics_sub + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_sub + "_16("); + code = code.replace(new RegExp(wb + s_atomics_sub + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_sub + "_32("); + + var s_atomics_and = escapeDollarForRegexSearch(atomics_and); + var v_atomics_and = escapeDollarForRegexValue(atomics_and); + code = code.replace(new RegExp(wb + s_atomics_and + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_and + "_8("); + code = code.replace(new RegExp(wb + s_atomics_and + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_and + "_16("); + code = code.replace(new RegExp(wb + s_atomics_and + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_and + "_32("); + + var s_atomics_or = escapeDollarForRegexSearch(atomics_or); + var v_atomics_or = escapeDollarForRegexValue(atomics_or); + code = code.replace(new RegExp(wb + s_atomics_or + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_or + "_8("); + code = code.replace(new RegExp(wb + s_atomics_or + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_or + "_16("); + code = code.replace(new RegExp(wb + s_atomics_or + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_or + "_32("); + + var s_atomics_xor = escapeDollarForRegexSearch(atomics_xor); + var v_atomics_xor = escapeDollarForRegexValue(atomics_xor); + code = code.replace(new RegExp(wb + s_atomics_xor + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_xor + "_8("); + code = code.replace(new RegExp(wb + s_atomics_xor + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_xor + "_16("); + code = code.replace(new RegExp(wb + s_atomics_xor + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_xor + "_32("); + + var s_atomics_exchange = escapeDollarForRegexSearch(atomics_exchange); + var v_atomics_exchange = escapeDollarForRegexValue(atomics_exchange); + code = code.replace(new RegExp(wb + s_atomics_exchange + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_exchange + "_8("); + code = code.replace(new RegExp(wb + s_atomics_exchange + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_exchange + "_16("); + code = code.replace(new RegExp(wb + s_atomics_exchange + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_exchange + "_32("); + + var s_atomics_compareExchange = escapeDollarForRegexSearch(atomics_compareExchange); + var v_atomics_compareExchange = escapeDollarForRegexValue(atomics_compareExchange); + code = code.replace(new RegExp(wb + s_atomics_compareExchange + '\\\\('+s_heap8+',', 'g'), '$1' + v_atomics_compareExchange + "_8("); + code = code.replace(new RegExp(wb + s_atomics_compareExchange + '\\\\('+s_heap16+',', 'g'), '$1' + v_atomics_compareExchange + "_16("); + code = code.replace(new RegExp(wb + s_atomics_compareExchange + '\\\\('+s_heap32+',', 'g'), '$1' + v_atomics_compareExchange + "_32("); // Remove the import statements of Atomics built-ins. - code = code.replace(new RegExp("var " + atomics_load + "\\\\s*=\\\\s*global\\.Atomics\\.load;"), ""); - code = code.replace(new RegExp("var " + atomics_store + "\\\\s*=\\\\s*global\\.Atomics\\.store;"), ""); - code = code.replace(new RegExp("var " + atomics_exchange + "\\\\s*=\\\\s*global\\.Atomics\\.exchange;"), ""); - code = code.replace(new RegExp("var " + atomics_compareExchange + "\\\\s*=\\\\s*global\\.Atomics\\.compareExchange;"), ""); - code = code.replace(new RegExp("var " + atomics_add + "\\\\s*=\\\\s*global\\.Atomics\\.add;"), ""); - code = code.replace(new RegExp("var " + atomics_sub + "\\\\s*=\\\\s*global\\.Atomics\\.sub;"), ""); - code = code.replace(new RegExp("var " + atomics_and + "\\\\s*=\\\\s*global\\.Atomics\\.and;"), ""); - code = code.replace(new RegExp("var " + atomics_or + "\\\\s*=\\\\s*global\\.Atomics\\.or;"), ""); - code = code.replace(new RegExp("var " + atomics_xor + "\\\\s*=\\\\s*global\\.Atomics\\.xor;"), ""); + code = code.replace(new RegExp("var " + s_atomics_load + "\\\\s*=\\\\s*global\\.Atomics\\.load;"), ""); + code = code.replace(new RegExp("var " + s_atomics_store + "\\\\s*=\\\\s*global\\.Atomics\\.store;"), ""); + code = code.replace(new RegExp("var " + s_atomics_exchange + "\\\\s*=\\\\s*global\\.Atomics\\.exchange;"), ""); + code = code.replace(new RegExp("var " + s_atomics_compareExchange + "\\\\s*=\\\\s*global\\.Atomics\\.compareExchange;"), ""); + code = code.replace(new RegExp("var " + s_atomics_add + "\\\\s*=\\\\s*global\\.Atomics\\.add;"), ""); + code = code.replace(new RegExp("var " + s_atomics_sub + "\\\\s*=\\\\s*global\\.Atomics\\.sub;"), ""); + code = code.replace(new RegExp("var " + s_atomics_and + "\\\\s*=\\\\s*global\\.Atomics\\.and;"), ""); + code = code.replace(new RegExp("var " + s_atomics_or + "\\\\s*=\\\\s*global\\.Atomics\\.or;"), ""); + code = code.replace(new RegExp("var " + s_atomics_xor + "\\\\s*=\\\\s*global\\.Atomics\\.xor;"), ""); // Implement polyfill versions of Atomics intrinsics inside the asm.js scope. code = code.replace("// EMSCRIPTEN_START_FUNCS", "// EMSCRIPTEN_START_FUNCS\\n" From 4b8276948d0fa8c5c5b2c779aa82ca805da29914 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 18 Feb 2016 18:03:20 -0800 Subject: [PATCH 40/49] add more testing for sab fallback --- tests/test_browser.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_browser.py b/tests/test_browser.py index afa0ee6dab80d..f1d133de6455c 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -2743,6 +2743,14 @@ def test_memory_growth_during_startup(self): # pthreads tests + def prep_no_SAB(self): + open('html.html', 'w').write(open(path_from_root('src', 'shell_minimal.html')).read().replace('''''', ''' + + ''')) + # Test that the emscripten_ atomics api functions work. def test_pthread_atomics(self): self.btest(path_from_root('tests', 'pthread', 'test_pthread_atomics.cpp'), expected='0', args=['-O3', '-s', 'USE_PTHREADS=2', '--separate-asm', '-s', 'PTHREAD_POOL_SIZE=8'], timeout=120) # extra time on first test, to be sure to build all libraries @@ -2795,6 +2803,10 @@ def test_pthread_create(self): print str(opt) + ' ' + str(pthreads) self.btest(path_from_root('tests', 'pthread', 'test_pthread_create.cpp'), expected='0', args=opt + pthreads + ['-s', 'PTHREAD_POOL_SIZE=8'], timeout=30) + if 'USE_PTHREADS=2' in pthreads: + self.prep_no_SAB() + self.btest(path_from_root('tests', 'pthread', 'test_pthread_create.cpp'), expected='0', args=opt + pthreads + ['-s', 'PTHREAD_POOL_SIZE=8', '--shell-file', 'html.html'], timeout=30) + # Test that a pthread can spawn another pthread of its own. def test_pthread_create_pthread(self): self.btest(path_from_root('tests', 'pthread', 'test_pthread_create_pthread.cpp'), expected='1', args=['-O3', '-s', 'USE_PTHREADS=2', '--separate-asm', '-s', 'PTHREAD_POOL_SIZE=2', '-s', 'NO_EXIT_RUNTIME=1'], timeout=30) @@ -2870,10 +2882,16 @@ def test_pthread_iostream(self): def test_pthread_setspecific_mainthread(self): self.btest(path_from_root('tests', 'pthread', 'test_pthread_setspecific_mainthread.cpp'), expected='0', args=['-O3', '-s', 'USE_PTHREADS=2', '--separate-asm'], timeout=30) + self.prep_no_SAB() + self.btest(path_from_root('tests', 'pthread', 'test_pthread_setspecific_mainthread.cpp'), expected='0', args=['-O3', '-s', 'USE_PTHREADS=2', '--separate-asm', '--shell-file', 'html.html'], timeout=30) + # Test the -s PTHREAD_HINT_NUM_CORES=x command line variable. def test_pthread_num_logical_cores(self): self.btest(path_from_root('tests', 'pthread', 'test_pthread_num_logical_cores.cpp'), expected='0', args=['-O3', '-s', 'USE_PTHREADS=2', '--separate-asm', '-s', 'PTHREAD_HINT_NUM_CORES=2'], timeout=30) + self.prep_no_SAB() + self.btest(path_from_root('tests', 'pthread', 'test_pthread_num_logical_cores.cpp'), expected='0', args=['-O3', '-g', '-s', 'USE_PTHREADS=2', '--separate-asm', '-s', 'PTHREAD_HINT_NUM_CORES=2', '--shell-file', 'html.html'], timeout=30) + # Test that pthreads have access to filesystem. def test_pthread_file_io(self): self.btest(path_from_root('tests', 'pthread', 'test_pthread_file_io.cpp'), expected='0', args=['-O3', '-s', 'USE_PTHREADS=2', '--separate-asm', '-s', 'PTHREAD_POOL_SIZE=1'], timeout=30) From f3818800215974605e9d70741d7c1a5721ea5d83 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 19 Feb 2016 16:59:25 -0800 Subject: [PATCH 41/49] export runtime methods in benchmarks, as lua tests need files --- tests/test_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index c405d706550fa..38d1e55fb0732 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -111,7 +111,7 @@ def process(filename): '-O3', '-s', 'DOUBLE_MODE=0', '-s', 'PRECISE_I64_MATH=0', '--memory-init-file', '0', '--js-transform', 'python hardcode.py', '-s', 'TOTAL_MEMORY=128*1024*1024', - '-s', 'NO_EXIT_RUNTIME=1', '-s', 'EXPORTED_RUNTIME_METHODS=[]', + '-s', 'NO_EXIT_RUNTIME=1', #'--profiling', #'--closure', '1', '-o', final] + shared_args + emcc_args + self.extra_args, stdout=PIPE, stderr=PIPE, env=self.env).communicate() From 659ba7750ce1f33aaa6c12b7d544f65a277bfd8d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 19 Feb 2016 18:08:05 -0800 Subject: [PATCH 42/49] make ccall regex work on all possible minification outputs #4111 --- src/preamble.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/preamble.js b/src/preamble.js index 0b4a6a41d0a4e..fe8bb74435f85 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -192,7 +192,7 @@ var cwrap, ccall; } #if NO_DYNAMIC_EXECUTION == 0 - var sourceRegex = /^function\s*[a-zA-Z]*\s*\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/; + var sourceRegex = /^function\s*[a-zA-Z$_0-9]*\s*\(([^)]*)\)\s*{\s*([^*]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/; function parseJSFunc(jsfunc) { // Match the body and the return value of a javascript function source var parsed = jsfunc.toString().match(sourceRegex).slice(1); From cf21ed24fa6fd1111efca2a660d98345b58aee88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sat, 20 Feb 2016 15:50:15 +0200 Subject: [PATCH 43/49] Fix race condition in emrun where the Emscripten page might execute before the JS page 'load' event is called, so window.addEventListener('load') might be too late to hook emrun handlers. Fixes browser.test_emrun on linux bot. --- src/emrun_postjs.js | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/emrun_postjs.js b/src/emrun_postjs.js index d34ff0b6ec2ec..3192491dd5b1a 100644 --- a/src/emrun_postjs.js +++ b/src/emrun_postjs.js @@ -44,14 +44,15 @@ if (typeof window === "object" && (typeof ENVIRONMENT_IS_PTHREAD === 'undefined' post('^pageload^'); } } - window.addEventListener('load', emrun_register_handlers); -} -// POSTs the given binary data represented as a (typed) array data back to the emrun-based web server. -// To use from C code, call e.g. EM_ASM_({emrun_file_dump("file.dat", HEAPU8.subarray($0, $0 + $1));}, my_data_pointer, my_data_pointer_byte_length); -function emrun_file_dump(filename, data) { - var http = new XMLHttpRequest(); - Module['print']('Dumping out file "' + filename + '" with ' + data.length + ' bytes of data.'); - http.open("POST", "stdio.html?file=" + filename, true); - http.send(data); // XXX this does not work in workers, for some odd reason (issue #2681) + // POSTs the given binary data represented as a (typed) array data back to the emrun-based web server. + // To use from C code, call e.g. EM_ASM_({emrun_file_dump("file.dat", HEAPU8.subarray($0, $0 + $1));}, my_data_pointer, my_data_pointer_byte_length); + function emrun_file_dump(filename, data) { + var http = new XMLHttpRequest(); + Module['print']('Dumping out file "' + filename + '" with ' + data.length + ' bytes of data.'); + http.open("POST", "stdio.html?file=" + filename, true); + http.send(data); // XXX this does not work in workers, for some odd reason (issue #2681) + } + + if (typeof Module !== 'undefined' && typeof document !== 'undefined') emrun_register_handlers(); } From 3e7c52c2d3fbbebfff3e3ef1388f6565bfffbea1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sat, 20 Feb 2016 10:42:01 -0800 Subject: [PATCH 44/49] fix browser.test_aniso error #4072 --- tests/aniso.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/aniso.c b/tests/aniso.c index b925149d3d369..1aa9e5f2095fc 100644 --- a/tests/aniso.c +++ b/tests/aniso.c @@ -219,6 +219,8 @@ int main(int argc, char *argv[]) SDL_Quit(); // check for asm compilation bug with aliased functions with different sigs + + glBegin( GL_TRIANGLE_STRIP ); void (*f)(int, int) = glVertex2i; if ((int)f % 16 == 4) f(5, 7); void (*g)(int, int) = glVertex3f; From a8fe57e0fc373fbef1b3fc0473afcc5aa28450f5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sat, 20 Feb 2016 10:47:06 -0800 Subject: [PATCH 45/49] add slack to test_glgears --- tests/test_browser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_browser.py b/tests/test_browser.py index f1d133de6455c..0146dca21f61a 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1349,7 +1349,7 @@ def test_chunked_synchronous_xhr(self): time.sleep(2) def test_glgears(self): - self.btest('hello_world_gles.c', reference='gears.png', reference_slack=2, + self.btest('hello_world_gles.c', reference='gears.png', reference_slack=3, args=['-DHAVE_BUILTIN_SINCOS'], outfile='something.html', message='You should see animating gears.') From 28a308b1a821598324042d95762c63ce658ebe32 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sat, 20 Feb 2016 11:14:38 -0800 Subject: [PATCH 46/49] fix race between main thread and worker reporting in browser.test_glgears_proxy --- tests/test_browser.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/test_browser.py b/tests/test_browser.py index 0146dca21f61a..c0d509291a5bb 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -570,18 +570,22 @@ def test_glgears_proxy(self): original = open('test.js').read() - def copy(to, js_mod): - open(to + '.html', 'w').write(open('test.html').read().replace('test.js', to + '.js')) + def copy(to, js_mod, html_mod = lambda x: x): + open(to + '.html', 'w').write(html_mod(open('test.html').read().replace('test.js', to + '.js'))) open(to + '.js', 'w').write(js_mod(open('test.js').read())) # run with noProxy, but make main thread fail - copy('two', lambda original: original.replace('function _main($argc,$argv) {', 'function _main($argc,$argv) { if (ENVIRONMENT_IS_WEB) { var xhr = new XMLHttpRequest(); xhr.open("GET", "http://localhost:8888/report_result?999");xhr.send(); }')) + copy('two', lambda original: original.replace('function _main($argc,$argv) {', 'function _main($argc,$argv) { if (ENVIRONMENT_IS_WEB) { var xhr = new XMLHttpRequest(); xhr.open("GET", "http://localhost:8888/report_result?999");xhr.send(); }'), + lambda original: original.replace('function doReftest() {', 'function doReftest() { return; ')) # don't reftest on main thread, it would race self.run_browser('two.html?noProxy', None, ['/report_result?999']) + copy('two', lambda original: original.replace('function _main($argc,$argv) {', 'function _main($argc,$argv) { if (ENVIRONMENT_IS_WEB) { var xhr = new XMLHttpRequest(); xhr.open("GET", "http://localhost:8888/report_result?999");xhr.send(); }')) self.run_browser('two.html', None, ['/report_result?0']) # this is still cool # run without noProxy, so proxy, but make worker fail - copy('three', lambda original: original.replace('function _main($argc,$argv) {', 'function _main($argc,$argv) { if (ENVIRONMENT_IS_WORKER) { var xhr = new XMLHttpRequest(); xhr.open("GET", "http://localhost:8888/report_result?999");xhr.send(); }')) + copy('three', lambda original: original.replace('function _main($argc,$argv) {', 'function _main($argc,$argv) { if (ENVIRONMENT_IS_WORKER) { var xhr = new XMLHttpRequest(); xhr.open("GET", "http://localhost:8888/report_result?999");xhr.send(); }'), + lambda original: original.replace('function doReftest() {', 'function doReftest() { return; ')) # don't reftest on main thread, it would race self.run_browser('three.html', None, ['/report_result?999']) + copy('three', lambda original: original.replace('function _main($argc,$argv) {', 'function _main($argc,$argv) { if (ENVIRONMENT_IS_WORKER) { var xhr = new XMLHttpRequest(); xhr.open("GET", "http://localhost:8888/report_result?999");xhr.send(); }')) self.run_browser('three.html?noProxy', None, ['/report_result?0']) # this is still cool def test_glgears_proxy_jstarget(self): From 59b39892c4ad44b602e797a126cab2e2e6d55d40 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sat, 20 Feb 2016 11:18:21 -0800 Subject: [PATCH 47/49] make browser reftests more deterministic #4072 --- tests/runner.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/runner.py b/tests/runner.py index f9353bac143f6..57094158d36b0 100755 --- a/tests/runner.py +++ b/tests/runner.py @@ -816,16 +816,15 @@ def reftest(self, expected): img.src = '%s'; }; Module['postRun'] = doReftest; - Module['preRun'].push(function() { - setTimeout(doReftest, 5000); // if run() throws an exception and postRun is not called, this will kick in - }); if (typeof WebGLClient !== 'undefined') { // trigger reftest from RAF as well, needed for workers where there is no pre|postRun on the main thread var realRAF = window.requestAnimationFrame; window.requestAnimationFrame = function(func) { - realRAF(func); - setTimeout(doReftest, 5000); + realRAF(function() { + func(); + realRAF(doReftest); + }); }; // trigger reftest from canvas render too, for workers not doing GL @@ -833,7 +832,7 @@ def reftest(self, expected): worker.onmessage = function(event) { realWOM(event); if (event.data.target === 'canvas' && event.data.op === 'render') { - setTimeout(doReftest, 5000); + realRAF(doReftest); } }; } From 445f12cf2b28d3b11c9874d8d63a24d57b9bf92e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 23 Feb 2016 20:40:04 -0800 Subject: [PATCH 48/49] make interactive.test_sdl_wm_togglefullscreen behavior match what the text says, keep the canvas painting yellow after leaving fullscreen --- tests/sdl_wm_togglefullscreen.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/sdl_wm_togglefullscreen.c b/tests/sdl_wm_togglefullscreen.c index c76ced76dc3f0..3758019a1be7a 100644 --- a/tests/sdl_wm_togglefullscreen.c +++ b/tests/sdl_wm_togglefullscreen.c @@ -12,6 +12,8 @@ int inFullscreen = 0; int wasFullscreen = 0; +int finished = 0; + void render() { int width, height, isfs; emscripten_get_canvas_size(&width, &height, &isfs); @@ -21,6 +23,9 @@ void render() { void mainloop() { render(); + + if (finished) return; + SDL_Event event; int isInFullscreen = EM_ASM_INT_V(return !!(document.fullscreenElement || document.mozFullScreenElement || document.webkitFullscreenElement || document.msFullscreenElement)); if (isInFullscreen && !wasFullscreen) { @@ -35,7 +40,7 @@ void mainloop() { REPORT_RESULT(); #endif wasFullscreen = isInFullscreen; - emscripten_cancel_main_loop(); + finished = 1; return; } @@ -55,7 +60,7 @@ void mainloop() { #ifdef REPORT_RESULT REPORT_RESULT(); #endif - emscripten_cancel_main_loop(); + finished = 1; return; } else { printf("Entering fullscreen...\n"); From 07b87426f898d6e9c677db291d9088c839197291 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 23 Feb 2016 20:41:48 -0800 Subject: [PATCH 49/49] 1.36.0 --- emscripten-version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emscripten-version.txt b/emscripten-version.txt index b6b783386b601..b9808b21d5a2e 100644 --- a/emscripten-version.txt +++ b/emscripten-version.txt @@ -1,2 +1,2 @@ -"1.35.23" +"1.36.0"