diff --git a/AUTHORS b/AUTHORS index 6c6819bd836ad..3dba6d6ddfa50 100644 --- a/AUTHORS +++ b/AUTHORS @@ -9,7 +9,7 @@ a license to everyone to use it as detailed in LICENSE.) * Max Shawabkeh * Sigmund Vik * Jeff Terrace -* Benoit Tremblay +* Benoit Tremblay * Andreas Bergmeier * Ben Schwartz * David Claughton diff --git a/ChangeLog b/ChangeLog index cb9f57dfbc8bb..e0781e5b14b57 100644 --- a/ChangeLog +++ b/ChangeLog @@ -10,9 +10,36 @@ Not all changes are documented here. In particular, new features, user-oriented Current trunk code ------------------ - To see a list of commits in the active development branch 'incoming', which have not yet been packaged in a release, see - - Emscripten: https://github.com/kripken/emscripten/compare/1.20.0...incoming - - Emscripten-LLVM: https://github.com/kripken/emscripten-fastcomp/compare/1.20.0...incoming - - Emscripten-Clang: https://github.com/kripken/emscripten-fastcomp-clang/compare/1.20.0...incoming + - Emscripten: https://github.com/kripken/emscripten/compare/1.21.1...incoming + - Emscripten-LLVM: https://github.com/kripken/emscripten-fastcomp/compare/1.21.1...incoming + - Emscripten-Clang: https://github.com/kripken/emscripten-fastcomp-clang/compare/1.21.1...incoming + +v1.21.1: 7/3/2014 +------------------ + - Fixed an issue where wrong python interpreter could get invoked on Windows when both native and cygwin python were installed. + - Updated musl from version 0.9.13 to version 1.0.3. + - Full list of changes: + - Emscripten: https://github.com/kripken/emscripten/compare/1.21.0...1.21.1 + - Emscripten-LLVM: no changes. + - Emscripten-Clang: no changes. + +v1.21.0: 7/2/2014 +------------------ + - Implemented new SDL 1.2 functions SDL_GetRGB, SDL_GetRGBA and SDL_putenv. + - Added support for /dev/random, /dev/urandom and C++11 std::random_device, which will use cryptographically secure random api if available. (#2447) + - Added support for CMake find_path() directive. + - Added support for std::unique_ptr in embind. + - Improved Windows support for ffdb.py. + - Implemented the clip_rect structure for created SDL surfaces. + - Fixed a regression with SDL touch events (#2466) + - Added support for C++11 std::thread::hardware_concurrency which backs to navigator.hardwareConcurrency. See http://wiki.whatwg.org/wiki/Navigator_HW_Concurrency (#2456) + - Optimized embind code generation with constexprs. + - Enabled the use of Runtime.add&removeFunction when closure minification is active (#2446) + - Implemented support for accessing WebGL when building via the proxy to worker architecture. + - Full list of changes: + - Emscripten: https://github.com/kripken/emscripten/compare/1.20.0...1.21.0 + - Emscripten-LLVM: no changes. + - Emscripten-Clang: no changes. v1.20.0: 6/13/2014 ------------------ diff --git a/emcc b/emcc index e10d260d4c31e..57827f2e60ca6 100755 --- a/emcc +++ b/emcc @@ -156,11 +156,11 @@ Options that are modified or new in %s include: opt levels, see apply_opt_level() in tools/shared.py and also src/settings.js.) - -O2 As -O1, plus various js-level optimizations and - LLVM -O3 optimizations. This is the recommended - setting for a release build: slower compilation - time in return for the smallest and fastest - output. + -O2 As -O1, plus various js-level optimizations, LLVM + -O3 optimizations, and memory init file generation + (--memory-init-file 1). This is a good setting + for an optimized build: runs much faster than + -O1, and compiles much faster than -O3. -Os Like -O2 with extra optimizations for size. @@ -482,7 +482,8 @@ Options that are modified or new in %s include: --memory-init-file 0: Do not emit a separate memory initialization file, keep the static initialization inside - the generated JavaScript as text (default) + the generated JavaScript as text (default + in -O0 and -O1) 1: Emit a separate memory initialization file in binary format. This is more efficient than storing it as text inside JavaScript, but does @@ -495,7 +496,7 @@ Options that are modified or new in %s include: stuff has happened and it is safe to call library functions, as main() will only be called at that time. You can also call - addOnPreMain from a preRun.) + addOnPreMain from a preRun.) (default in -O2+) -Wno-warn-absolute-paths If not specified, the compiler will warn about any uses of absolute paths in -I and -L command line @@ -823,7 +824,7 @@ try: emrun = False jcache = False save_bc = False - memory_init_file = False + memory_init_file = None use_preload_cache = False no_heap_copy = False proxy_to_worker = False @@ -1075,6 +1076,7 @@ try: if js_opts is None: js_opts = opt_level >= 2 if llvm_opts is None: llvm_opts = LLVM_OPT_LEVEL[opt_level] if opt_level == 0: debug_level = 4 + if memory_init_file is None: memory_init_file = opt_level >= 2 if llvm_lto is None and bind: logging.debug('running lto for embind') # XXX this is a workaround for a pointer issue @@ -1357,7 +1359,7 @@ try: assert not shared.Settings.MAIN_MODULE if shared.Settings.MAIN_MODULE or shared.Settings.SIDE_MODULE: - assert not memory_init_file, 'memory init file is not supported with module linking' + memory_init_file = False # memory init file is not supported with module linking assert shared.Settings.ASM_JS, 'module linking requires asm.js output (-s ASM_JS=1)' shared.Settings.LINKABLE = 1 # TODO: add FORCE_DCE option for the brave people that do want to dce here and in side modules debug_level = max(debug_level, 2) @@ -1770,6 +1772,9 @@ try: js_optimizer_queue += ['simplifyExpressions'] + if shared.Settings.RELOOP and not shared.Settings.ASM_JS: + js_optimizer_queue += ['optimizeShiftsAggressive', get_eliminate()] # aggressive shifts optimization requires loops, it breaks on switches + # simplify ifs if it is ok to make the code somewhat unreadable, and unless outlining (simplified ifs # with commaified code breaks late aggressive variable elimination) if shared.Settings.SIMPLIFY_IFS and (debug_level == 0 or profiling) and shared.Settings.OUTLINING_LIMIT == 0: js_optimizer_queue += ['simplifyIfs'] diff --git a/emscripten-version.txt b/emscripten-version.txt index 3d4850e7671f3..1825e0c3ba167 100644 --- a/emscripten-version.txt +++ b/emscripten-version.txt @@ -1,2 +1,2 @@ -1.21.1 +1.21.2 diff --git a/emscripten.py b/emscripten.py index 98877a8ebc95c..8193ba52fdd70 100755 --- a/emscripten.py +++ b/emscripten.py @@ -930,7 +930,6 @@ def move_preasm(m): funcs_js += ['\n// EMSCRIPTEN_END_FUNCS\n'] - simple = os.environ.get('EMCC_SIMPLE_ASM') class Counter: i = 0 j = 0 @@ -1113,12 +1112,9 @@ def keyfunc(other): exported_implemented_functions = list(exported_implemented_functions) + metadata['initializers'] exported_implemented_functions.append('runPostSets') exports = [] - if not simple: - for export in exported_implemented_functions + asm_runtime_funcs + function_tables: - exports.append("%s: %s" % (export, export)) - exports = '{ ' + ', '.join(exports) + ' }' - else: - exports = '_main' + for export in exported_implemented_functions + asm_runtime_funcs + function_tables: + exports.append("%s: %s" % (export, export)) + exports = '{ ' + ', '.join(exports) + ' }' # calculate globals try: del forwarded_json['Variables']['globals']['_llvm_global_ctors'] # not a true variable @@ -1145,10 +1141,17 @@ def math_fix(g): the_global = '{ ' + ', '.join(['"' + math_fix(s) + '": ' + s for s in fundamentals]) + ' }' sending = '{ ' + ', '.join(['"' + math_fix(s) + '": ' + s for s in basic_funcs + global_funcs + basic_vars + basic_float_vars + global_vars]) + ' }' # received - if not simple: - receiving = ';\n'.join(['var ' + s + ' = Module["' + s + '"] = asm["' + s + '"]' for s in exported_implemented_functions + function_tables]) - else: - receiving = 'var _main = Module["_main"] = asm;' + receiving = '' + if settings['ASSERTIONS']: + # assert on the runtime being in a valid state when calling into compiled code. The only exceptions are + # some support code like malloc TODO: verify that malloc is actually safe to use that way + receiving = '\n'.join(['var real_' + s + ' = asm["' + s + '"]; asm["' + s + '''"] = function() { + assert(runtimeInitialized, 'you need to wait for the runtime to be ready (e.g. wait for main() to be called)'); + assert(!runtimeExited, 'the runtime was exited (use NO_EXIT_RUNTIME to keep it alive after main() exits)'); + return real_''' + s + '''.apply(null, arguments); +}; +''' for s in exported_implemented_functions if s not in ['_malloc', '_free', '_memcpy', '_memset']]) + receiving += ';\n'.join(['var ' + s + ' = Module["' + s + '"] = asm["' + s + '"]' for s in exported_implemented_functions + function_tables]) # finalize diff --git a/src/library_sdl.js b/src/library_sdl.js index 2f41137b7c5c8..ec1ae61ff09a5 100644 --- a/src/library_sdl.js +++ b/src/library_sdl.js @@ -260,6 +260,16 @@ var LibrarySDL = { }; }, + checkPixelFormat: function(fmt) { +#if ASSERTIONS + // Canvas screens are always RGBA. + var format = {{{ makeGetValue('fmt', C_STRUCTS.SDL_PixelFormat.format, 'i32') }}}; + if (format != {{{ cDefine('SDL_PIXELFORMAT_RGBA8888') }}}) { + Runtime.warnOnce('Unsupported pixel format!'); + } +#endif + }, + // Load SDL color into a CSS-style color specification loadColorToCSSRGB: function(color) { var rgba = {{{ makeGetValue('color', '0', 'i32') }}}; @@ -1840,17 +1850,20 @@ var LibrarySDL = { }, SDL_MapRGB: function(fmt, r, g, b) { - // Canvas screens are always RGBA. We assume the machine is little-endian. + SDL.checkPixelFormat(fmt); + // We assume the machine is little-endian. return r&0xff|(g&0xff)<<8|(b&0xff)<<16|0xff000000; }, SDL_MapRGBA: function(fmt, r, g, b, a) { - // Canvas screens are always RGBA. We assume the machine is little-endian. + SDL.checkPixelFormat(fmt); + // We assume the machine is little-endian. return r&0xff|(g&0xff)<<8|(b&0xff)<<16|(a&0xff)<<24; }, SDL_GetRGB: function(pixel, fmt, r, g, b) { - // Canvas screens are always RGBA. We assume the machine is little-endian. + SDL.checkPixelFormat(fmt); + // We assume the machine is little-endian. if (r) { {{{ makeSetValue('r', '0', 'pixel&0xff', 'i8') }}}; } @@ -1863,7 +1876,8 @@ var LibrarySDL = { }, SDL_GetRGBA: function(pixel, fmt, r, g, b, a) { - // Canvas screens are always RGBA. We assume the machine is little-endian. + SDL.checkPixelFormat(fmt); + // We assume the machine is little-endian. if (r) { {{{ makeSetValue('r', '0', 'pixel&0xff', 'i8') }}}; } diff --git a/src/modules.js b/src/modules.js index 56f4c827a3e18..0c1d24bce6b4c 100644 --- a/src/modules.js +++ b/src/modules.js @@ -444,7 +444,7 @@ var LibraryManager = { } // apply synonyms. these are typically not speed-sensitive, and doing it this way makes it possible to not include hacks in the compiler - // (and makes it simpler to switch between SDL verisons, fastcomp and non-fastcomp, etc.). + // (and makes it simpler to switch between SDL versions, fastcomp and non-fastcomp, etc.). var lib = LibraryManager.library; libloop: for (var x in lib) { if (x.lastIndexOf('__') > 0) continue; // ignore __deps, __* @@ -453,17 +453,14 @@ var LibraryManager = { var target = x; while (typeof lib[target] === 'string') { if (lib[target].indexOf('(') >= 0) continue libloop; + if (lib[target].indexOf('Math_') == 0) continue libloop; target = lib[target]; } + if (lib[target + '__asm']) continue; // This is an alias of an asm library function. Also needs to be fully optimized. if (typeof lib[target] === 'undefined' || typeof lib[target] === 'function') { - if (target.indexOf('Math_') < 0) { - lib[x] = new Function('return _' + target + '.apply(null, arguments)'); - if (!lib[x + '__deps']) lib[x + '__deps'] = []; - lib[x + '__deps'].push(target); - } else { - lib[x] = new Function('return ' + target + '.apply(null, arguments)'); - } - continue; + lib[x] = new Function('return _' + target + '.apply(null, arguments)'); + if (!lib[x + '__deps']) lib[x + '__deps'] = []; + lib[x + '__deps'].push(target); } } } diff --git a/src/postamble.js b/src/postamble.js index cc1679098f548..85e3b320ba36f 100644 --- a/src/postamble.js +++ b/src/postamble.js @@ -127,6 +127,8 @@ function run(args) { if (Module['calledRun']) return; // run may have just been called while the async setStatus time below was happening Module['calledRun'] = true; + if (ABORT) return; + ensureInitRuntime(); preMain(); @@ -148,7 +150,7 @@ function run(args) { setTimeout(function() { Module['setStatus'](''); }, 1); - if (!ABORT) doRun(); + doRun(); }, 1); } else { doRun(); diff --git a/src/preamble.js b/src/preamble.js index fd8341702c4ac..953b552cc9734 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -383,9 +383,6 @@ var cwrap, ccall; // Note that string arguments will be stored on the stack (the JS string will become a C string on the stack). // @return The return value, as a native JS value (as in returnType) ccall = function ccallFunc(ident, returnType, argTypes, args) { -#if ASSERTIONS - assert(runtimeInitialized, 'you need to wait for the runtime to be ready (e.g. wait for main() to be called)'); -#endif var func = getCFunc(ident); var cArgs = []; #if ASSERTIONS @@ -441,9 +438,6 @@ var cwrap, ccall; // Creation of the arguments list (["$1","$2",...,"$nargs"]) var argNames = argTypes.map(function(x,i){return '$'+i}); var funcstr = "(function(" + argNames.join(',') + ") {"; -#if ASSERTIONS - funcstr += "assert(runtimeInitialized, 'you need to wait for the runtime to be ready (e.g. wait for main() to be called)');\n"; -#endif var nargs = argTypes.length; if (!numericArgs) { // Generate the code needed to convert the arguments from javascript @@ -1102,6 +1096,7 @@ var __ATEXIT__ = []; // functions called during shutdown var __ATPOSTRUN__ = []; // functions called after the runtime has exited var runtimeInitialized = false; +var runtimeExited = false; function preRun() { // compatibility - merge in anything from Module['preRun'] at this time @@ -1131,7 +1126,7 @@ function exitRuntime() { } #endif callRuntimeCallbacks(__ATEXIT__); - runtimeInitialized = false; + runtimeExited = true; } function postRun() { diff --git a/src/struct_info.json b/src/struct_info.json index 54c89fd7d29ad..655b6bc99575c 100644 --- a/src/struct_info.json +++ b/src/struct_info.json @@ -871,7 +871,7 @@ }, { "file": "SDL/SDL_pixels.h", - "defines": [], + "defines": ["SDL_PIXELFORMAT_RGBA8888"], "structs": { "SDL_Palette": [ "ncolors", diff --git a/system/lib/libc.symbols b/system/lib/libc.symbols index 28e959d9bfde0..90783d8d11457 100644 --- a/system/lib/libc.symbols +++ b/system/lib/libc.symbols @@ -55,6 +55,7 @@ T atol W bulk_free W calloc + T ffs W free T frexp T frexpf diff --git a/system/lib/libc/musl/src/misc/ffs.c b/system/lib/libc/musl/src/misc/ffs.c new file mode 100644 index 0000000000000..673ce5a975828 --- /dev/null +++ b/system/lib/libc/musl/src/misc/ffs.c @@ -0,0 +1,7 @@ +#include +#include "atomic.h" + +int ffs(int i) +{ + return i ? a_ctz_l(i)+1 : 0; +} diff --git a/tests/runner.py b/tests/runner.py index e9479313fde4d..93c40e3f41e2d 100755 --- a/tests/runner.py +++ b/tests/runner.py @@ -240,15 +240,17 @@ def build(self, src, dirname, filename, output_processor=None, main_file=None, a if output_processor is not None: output_processor(open(filename + '.o.js').read()) - if self.emcc_args is not None and 'ASM_JS=1' in self.emcc_args: + if self.emcc_args is not None: if '--memory-init-file' in self.emcc_args: memory_init_file = int(self.emcc_args[self.emcc_args.index('--memory-init-file')+1]) else: - memory_init_file = 0 + memory_init_file = '-O2' in self.emcc_args or '-O3' in self.emcc_args + src = open(filename + '.o.js').read() if memory_init_file: - assert '/* memory initializer */' not in open(filename + '.o.js').read() + # side memory init file, or an empty one in the js + assert ('/* memory initializer */' not in src) or ('/* memory initializer */ allocate([]' in src) else: - assert 'memory initializer */' in open(filename + '.o.js').read() + assert 'memory initializer */' in src def validate_asmjs(self, err): if 'uccessfully compiled asm.js code' in err and 'asm.js link error' not in err: diff --git a/tests/runtime_misuse.cpp b/tests/runtime_misuse.cpp new file mode 100644 index 0000000000000..d74d05ea4db9e --- /dev/null +++ b/tests/runtime_misuse.cpp @@ -0,0 +1,27 @@ +#include +#include + +extern "C" { + +int noted = 0; + +char* EMSCRIPTEN_KEEPALIVE note(int n) { + EM_ASM_({ Module.print([$0, $1]) }, n, noted); + noted += n; + EM_ASM_({ Module.print(['noted is now', $0]) }, noted); + return "silly-string"; +} + +void free(void*) { // free is valid to call even after the runtime closes, so useful as a hack here for this test + EM_ASM_({ Module.print(['reporting', $0]) }, noted); + int result = noted; + REPORT_RESULT(); +} + +} + +int main() { + EM_ASM( myJSCallback() ); // calls a global JS func + return 0; +} + diff --git a/tests/test_browser.py b/tests/test_browser.py index df595142ee39b..3765739371456 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1722,46 +1722,77 @@ def test_mem_init(self): # otherwise, we just overwrite self.btest('mem_init.cpp', expected='3', args=['--pre-js', 'pre.js', '--post-js', 'post.js', '--memory-init-file', '1', '-s', 'ASSERTIONS=0']) - def test_mem_init_ccall(self): - open(os.path.join(self.get_dir(), 'post.js'), 'w').write(''' - function doCcall() { - ccall('note', 'string', ['number'], [2]); + def test_runtime_misuse(self): + post_prep = ''' + var expected_ok = false; + function doCcall(n) { + ccall('note', 'string', ['number'], [n]); } var wrapped = cwrap('note', 'string', ['number']); // returns a string to suppress cwrap optimization - function doCwrapCall() { - var str = wrapped(3); + function doCwrapCall(n) { + var str = wrapped(n); Module.print('got ' + str); assert(str === 'silly-string'); } - - var ok = true; + function doDirectCall(n) { + Module['_note'](n); + } + ''' + post_test = ''' + var ok = false; try { - doCcall(); - ok = false; // should fail and not reach here, runtime is not ready yet so ccall will abort + doCcall(1); + ok = true; // should fail and not reach here, runtime is not ready yet so ccall will abort } catch(e) { Module.print('expected fail 1'); + assert(e.toString().indexOf('assert') >= 0); // assertion, not something else ABORT = false; // hackish } - assert(ok); + assert(ok === expected_ok); - ok = true; + ok = false; try { - doCwrapCall(); - ok = false; // should fail and not reach here, runtime is not ready yet so cwrap call will abort + doCwrapCall(2); + ok = true; // should fail and not reach here, runtime is not ready yet so cwrap call will abort } catch(e) { Module.print('expected fail 2'); + assert(e.toString().indexOf('assert') >= 0); // assertion, not something else + ABORT = false; // hackish + } + assert(ok === expected_ok); + + ok = false; + try { + doDirectCall(3); + ok = true; // should fail and not reach here, runtime is not ready yet so any code execution + } catch(e) { + Module.print('expected fail 3'); + assert(e.toString().indexOf('assert') >= 0); // assertion, not something else ABORT = false; // hackish } - assert(ok); + assert(ok === expected_ok); + ''' + post_hook = r''' function myJSCallback() { // called from main, this is an ok time - doCcall(); - doCwrapCall(); + doCcall(100); + doCwrapCall(200); + doDirectCall(300); } - ''') - self.btest('mem_init.cpp', expected='3', args=['--post-js', 'post.js', '--memory-init-file', '1']) + setTimeout(Module['_free'], 1000); // free is valid to call even after the runtime closes + ''' + + print 'mem init, so async, call too early' + open(os.path.join(self.get_dir(), 'post.js'), 'w').write(post_prep + post_test + post_hook) + self.btest('runtime_misuse.cpp', expected='600', args=['--post-js', 'post.js', '--memory-init-file', '1']) + print 'sync startup, call too late' + open(os.path.join(self.get_dir(), 'post.js'), 'w').write(post_prep + 'Module.postRun.push(function() { ' + post_test + ' });' + post_hook); + self.btest('runtime_misuse.cpp', expected='600', args=['--post-js', 'post.js', '--memory-init-file', '0']) + print 'sync, runtime still alive, so all good' + open(os.path.join(self.get_dir(), 'post.js'), 'w').write(post_prep + 'expected_ok = true; Module.postRun.push(function() { ' + post_test + ' });' + post_hook); + self.btest('runtime_misuse.cpp', expected='606', args=['--post-js', 'post.js', '--memory-init-file', '0', '-s', 'NO_EXIT_RUNTIME=1']) def test_worker_api(self): Popen([PYTHON, EMCC, path_from_root('tests', 'worker_api_worker.cpp'), '-o', 'worker.js', '-s', 'BUILD_AS_WORKER=1', '-s', 'EXPORTED_FUNCTIONS=["_one"]']).communicate() @@ -1825,15 +1856,15 @@ def test_uuid(self): # First run tests in Node and/or SPIDERMONKEY using run_js. Use closure compiler so we can check that # require('crypto').randomBytes and window.crypto.getRandomValues doesn't get minified out. - Popen([PYTHON, EMCC, '-O2', '--closure', '1', path_from_root('tests', 'uuid', 'test.c'), '-o', path_from_root('tests', 'uuid', 'test.js')], stdout=PIPE, stderr=PIPE).communicate() + Popen([PYTHON, EMCC, '-O2', '--closure', '1', path_from_root('tests', 'uuid', 'test.c'), '-o', 'test.js'], stdout=PIPE, stderr=PIPE).communicate() - test_js_closure = open(path_from_root('tests', 'uuid', 'test.js')).read() + test_js_closure = open('test.js').read() # Check that test.js compiled with --closure 1 contains ").randomBytes" and "window.crypto.getRandomValues" assert ").randomBytes" in test_js_closure assert "window.crypto.getRandomValues" in test_js_closure - out = run_js(path_from_root('tests', 'uuid', 'test.js'), full_output=True) + out = run_js('test.js', full_output=True) print out # Tidy up files that might have been created by this test. diff --git a/tests/test_core.py b/tests/test_core.py index 20278d374a580..747f656c4b556 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -843,6 +843,7 @@ def test_math_hyperbolic(self): def test_math_lgamma(self): if self.emcc_args is None: return self.skip('requires emcc') if not self.is_emscripten_abi(): return self.skip('asmjs-unknown-emscripten needed for accurate math') + if os.environ.get('EMCC_FAST_COMPILER') == '0': return self.skip('fastcomp needed for proper handling of _signgam extern') test_path = path_from_root('tests', 'math', 'lgamma') src, output = (test_path + s for s in ('.in', '.out')) @@ -2833,11 +2834,8 @@ def can_dlfcn(self): self.skip('todo in fastcomp') return False - if self.emcc_args and '--memory-init-file' in self.emcc_args: - for i in range(len(self.emcc_args)): - if self.emcc_args[i] == '--memory-init-file': - self.emcc_args = self.emcc_args[:i] + self.emcc_args[i+2:] - break + if self.emcc_args: + self.emcc_args += ['--memory-init-file', '0'] if Settings.ASM_JS: Settings.DLOPEN_SUPPORT = 1 @@ -4036,7 +4034,7 @@ def process(filename): try_delete(mem_file) self.do_run(src, ('size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\nok.\ntexte\n', 'size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\ntexte\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\nok.\n'), post_build=post, extra_emscripten_args=['-H', 'libc/fcntl.h']) - if self.emcc_args and '--memory-init-file' in self.emcc_args: + if self.emcc_args and '-O2' in self.emcc_args: assert os.path.exists(mem_file) def test_files_m(self): @@ -6815,7 +6813,7 @@ def setUp(self): asm2 = make_run("asm2", compiler=CLANG, emcc_args=["-O2"]) asm3 = make_run("asm3", compiler=CLANG, emcc_args=["-O3"]) asm2f = make_run("asm2f", compiler=CLANG, emcc_args=["-O2", "-s", "PRECISE_F32=1"]) -asm2g = make_run("asm2g", compiler=CLANG, emcc_args=["-O2", "-g", "-s", "ASSERTIONS=1", "--memory-init-file", "1", "-s", "SAFE_HEAP=1"]) +asm2g = make_run("asm2g", compiler=CLANG, emcc_args=["-O2", "-g", "-s", "ASSERTIONS=1", "-s", "SAFE_HEAP=1"]) # Legacy test modes - slow2 = make_run("slow2", compiler=CLANG, emcc_args=["-O2", "-s", "ASM_JS=0"], env={"EMCC_FAST_COMPILER": "0"}) diff --git a/tests/test_other.py b/tests/test_other.py index 9e3c59e81f7d3..c54f312fd2e13 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -208,7 +208,6 @@ def test_emcc(self): (['-O2', '-g2'], lambda generated: '// The Module object' not in generated, 'with -g2, no comments in shell code'), (['-O2', '-g3'], lambda generated: '// The Module object' in generated, 'with -g3, yes comments in shell code'), (['-O2', '-profiling'], lambda generated: '// The Module object' in generated or os.environ.get('EMCC_FAST_COMPILER') == '0', 'with -profiling, yes comments in shell code (in fastcomp)'), - ]: print params, text self.clear() @@ -278,6 +277,12 @@ def test_emcc(self): output = Popen([PYTHON, compiler, path_from_root('tests', 'hello_world' + suffix), '--js-transform', '%s t.py' % (PYTHON)], stdout=PIPE, stderr=PIPE).communicate() assert open('a.out.js').read() == 'transformed!', 'Transformed output must be as expected' + for opts in [0, 1, 2, 3]: + print 'mem init in', opts + self.clear() + output = Popen([PYTHON, compiler, path_from_root('tests', 'hello_world.c'), '-O' + str(opts)], stdout=PIPE, stderr=PIPE).communicate() + assert os.path.exists('a.out.js.mem') == (opts >= 2), 'mem file should exist in -O2+' + # TODO: Add in files test a clear example of using disablePermissions, and link to it from the wiki # TODO: test normal project linking, static and dynamic: get_library should not need to be told what to link! # TODO: deprecate llvm optimizations, dlmalloc, etc. in emscripten.py. @@ -1837,6 +1842,12 @@ def test_js_optimizer(self): for input, expected, passes in [ (path_from_root('tools', 'test-js-optimizer.js'), open(path_from_root('tools', 'test-js-optimizer-output.js')).read(), ['hoistMultiples', 'removeAssignsToUndefined', 'simplifyExpressions']), + (path_from_root('tools', 'test-js-optimizer-t2c.js'), open(path_from_root('tools', 'test-js-optimizer-t2c-output.js')).read(), + ['simplifyExpressions', 'optimizeShiftsConservative']), + (path_from_root('tools', 'test-js-optimizer-t2.js'), open(path_from_root('tools', 'test-js-optimizer-t2-output.js')).read(), + ['simplifyExpressions', 'optimizeShiftsAggressive']), + (path_from_root('tools', 'test-js-optimizer-t3.js'), open(path_from_root('tools', 'test-js-optimizer-t3-output.js')).read(), + ['optimizeShiftsAggressive']), (path_from_root('tools', 'test-js-optimizer-si.js'), open(path_from_root('tools', 'test-js-optimizer-si-output.js')).read(), ['simplifyIfs']), (path_from_root('tools', 'test-js-optimizer-regs.js'), open(path_from_root('tools', 'test-js-optimizer-regs-output.js')).read(), @@ -2182,23 +2193,23 @@ def test_module_exports_with_closure(self): # Run with ./runner.py other.test_module_exports_with_closure # First make sure test.js isn't present. - try_delete(path_from_root('tests', 'Module-exports', 'test.js')) - assert not os.path.exists(path_from_root('tests', 'Module-exports', 'test.js')) + self.clear() # compile with -O2 --closure 0 - Popen([PYTHON, EMCC, path_from_root('tests', 'Module-exports', 'test.c'), '-o', path_from_root('tests', 'Module-exports', 'test.js'), '-O2', '--closure', '0', '--pre-js', path_from_root('tests', 'Module-exports', 'setup.js'), '-s', 'EXPORTED_FUNCTIONS=["_bufferTest"]'], stdout=PIPE, stderr=PIPE).communicate() + Popen([PYTHON, EMCC, path_from_root('tests', 'Module-exports', 'test.c'), '-o', 'test.js', '-O2', '--closure', '0', '--pre-js', path_from_root('tests', 'Module-exports', 'setup.js'), '-s', 'EXPORTED_FUNCTIONS=["_bufferTest"]'], stdout=PIPE, stderr=PIPE).communicate() # Check that compilation was successful - assert os.path.exists(path_from_root('tests', 'Module-exports', 'test.js')) - test_js_closure_0 = open(path_from_root('tests', 'Module-exports', 'test.js')).read() + assert os.path.exists('test.js') + test_js_closure_0 = open('test.js').read() # Check that test.js compiled with --closure 0 contains "module['exports'] = Module;" assert ("module['exports'] = Module;" in test_js_closure_0) or ('module["exports"]=Module' in test_js_closure_0) # Check that main.js (which requires test.js) completes successfully when run in node.js # in order to check that the exports are indeed functioning correctly. + shutil.copyfile(path_from_root('tests', 'Module-exports', 'main.js'), 'main.js') if NODE_JS in JS_ENGINES: - self.assertContained('bufferTest finished', run_js(path_from_root('tests', 'Module-exports', 'main.js'), engine=NODE_JS)) + self.assertContained('bufferTest finished', run_js('main.js', engine=NODE_JS)) # Delete test.js again and check it's gone. try_delete(path_from_root('tests', 'Module-exports', 'test.js')) @@ -2218,7 +2229,7 @@ def test_module_exports_with_closure(self): # Check that main.js (which requires test.js) completes successfully when run in node.js # in order to check that the exports are indeed functioning correctly. if NODE_JS in JS_ENGINES: - self.assertContained('bufferTest finished', run_js(path_from_root('tests', 'Module-exports', 'main.js'), engine=NODE_JS)) + self.assertContained('bufferTest finished', run_js('main.js', engine=NODE_JS)) # Tidy up files that might have been created by this test. try_delete(path_from_root('tests', 'Module-exports', 'test.js')) diff --git a/tests/test_sanity.py b/tests/test_sanity.py index 3ebd49b6baf6a..5562ec4f9fb3d 100644 --- a/tests/test_sanity.py +++ b/tests/test_sanity.py @@ -94,12 +94,10 @@ def make_executable(name): self.assertContained('It contains our best guesses for the important paths, which are:', output) self.assertContained('LLVM_ROOT', output) self.assertContained('NODE_JS', output) - self.assertContained('PYTHON', output) if platform.system() is not 'Windows': # os.chmod can't make files executable on Windows self.assertIdentical(temp_bin, re.search("^ *LLVM_ROOT *= (.*)$", output, re.M).group(1)) self.assertIdentical(os.path.join(temp_bin, 'node'), re.search("^ *NODE_JS *= (.*)$", output, re.M).group(1)) - self.assertIdentical(os.path.join(temp_bin, 'python2'), re.search("^ *PYTHON *= (.*)$", output, re.M).group(1)) self.assertContained('Please edit the file if any of those are incorrect', output) self.assertContained('This command will now exit. When you are done editing those paths, re-run it.', output) assert output.split()[-1].endswith('===='), 'We should have stopped: ' + output @@ -454,14 +452,14 @@ def test_emcc_caching(self): assert os.stat(dcebc_name).st_size < os.stat(basebc_name).st_size/2, 'Dead code elimination must remove most of libc++' # should only have metadata in -O0, not 1 and 2 if i > 0: + ll = None for ll_name in ll_names: - ll = None - try: - ll = open(ll_name).read() + if os.path.exists(ll_name): + check_call([LLVM_DIS, ll_name, '-o', ll_name + '.ll']) + ll = open(ll_name + '.ll').read() break - except: - pass assert ll + print 'metas:', ll.count('\n!') assert ll.count('\n!') < 25 # a few lines are left even in -O1 and -O2 finally: del os.environ['EMCC_DEBUG'] diff --git a/tools/js-optimizer.js b/tools/js-optimizer.js index d3121bc8bb9ca..0c689d04471a2 100644 --- a/tools/js-optimizer.js +++ b/tools/js-optimizer.js @@ -975,6 +975,397 @@ function simplifyIfs(ast) { }); } +// In typed arrays mode 2, we can have +// HEAP[x >> 2] +// very often. We can in some cases do the shift on the variable itself when it is set, +// to greatly reduce the number of shift operations. +function optimizeShiftsInternal(ast, conservative) { + assert(!asm); + var MAX_SHIFTS = 3; + traverseGeneratedFunctions(ast, function(fun) { + var funMore = true; + var funFinished = {}; + while (funMore) { + funMore = false; + // Recognize variables and parameters + var vars = {}; + function newVar(name, param, addUse) { + if (!vars[name]) { + vars[name] = { + param: param, + defs: addUse ? 1 : 0, + uses: 0, + timesShifted: [0, 0, 0, 0], // zero shifts of size 0, 1, 2, 3 + benefit: 0, + primaryShift: -1 + }; + } + } + // params + if (fun[2]) { + fun[2].forEach(function(arg) { + newVar(arg, true, true); + }); + } + // vars + // XXX if var has >>=, ignore it here? That means a previous pass already optimized it + var hasSwitch = traverse(fun, function(node, type) { + if (type === 'var') { + node[1].forEach(function(arg) { + newVar(arg[0], false, arg[1]); + }); + } else if (type === 'switch') { + // The relooper can't always optimize functions, and we currently don't work with + // switch statements when optimizing shifts. Bail. + return true; + } + }); + if (hasSwitch) { + break; + } + // uses and defs TODO: weight uses by being inside a loop (powers). without that, we + // optimize for code size, not speed. + var stack = []; + traverse(fun, function(node, type) { + stack.push(node); + if (type === 'name' && vars[node[1]] && stack[stack.length-2][0] != 'assign') { + vars[node[1]].uses++; + } else if (type === 'assign' && node[2][0] === 'name' && vars[node[2][1]]) { + vars[node[2][1]].defs++; + } + }); + // First, break up elements inside a shift. This lets us see clearly what to do next. + traverse(fun, function(node, type) { + if (type === 'binary' && node[1] === '>>' && node[3][0] === 'num') { + var shifts = node[3][1]; + if (shifts <= MAX_SHIFTS) { + // Check for validity. It is ok to have a single element that might have non-zeroed lower bits, but no more. + // x + 4 >> 2 === (x >> 2) + (4 >> 2), but not x + 3 >> 2 === (x >> 2) + (3 >> 2) (c.f. x=1, we get 1 !== 0) + var seen = '', ok = true; + function checkShift(subNode) { + if (subNode[0] === 'binary') { + switch (subNode[1]) { + case '+': case '|': case '&': { // this could be more comprehensive, but likely not needed + checkShift(subNode[2]); + checkShift(subNode[3]); + break; + } + case '>>': case '>>>': { + checkShift(subNode[2]); + break; + } + case '<<': { + if (subNode[3][0] === 'num' && subNode[3][1] >= shifts) break; // bits are clear, all good + checkShift(subNode[2]); + break; + } + case '*': { + if (subNode[3][0] === 'num') { + var value = subNode[3][1]; + if (((value >> shifts) << shifts) === value) return; // bits are clear, all good + } + checkShift(subNode[2]); + checkShift(subNode[3]); + break; + } + default: ok = false; + } + return; + } + if (subNode[0] === 'name') { + var name = subNode[1]; + if (!seen) { + seen = name; + } else if (name !== seen) { + ok = false; + } + return; + } + if (subNode[0] === 'num') { + var value = subNode[1]; + if (((value >> shifts) << shifts) !== value) ok = false; + return; + } + if (subNode[0] === 'sub') { + if (seen) ok = false; + seen = 'heap access'; + return; + } + ok = false; // anything else is bad + } + checkShift(node[2]); + if (!ok) return; + + // Push the >> inside the value elements + function addShift(subNode) { + if (subNode[0] === 'binary' && subNode[1] === '+') { + subNode[2] = addShift(subNode[2]); + subNode[3] = addShift(subNode[3]); + return subNode; + } + if (subNode[0] === 'name' && !subNode[2]) { // names are returned with a shift, but we also note their being shifted + var name = subNode[1]; + if (vars[name]) { + vars[name].timesShifted[shifts]++; + subNode[2] = true; + } + } + return ['binary', '>>', subNode, ['num', shifts]]; + } + return addShift(node[2]); + } + } + }); + traverse(fun, function(node, type) { + if (node[0] === 'name' && node[2]) { + return node.slice(0, 2); // clean up our notes + } + }); + // At this point, shifted expressions are split up, and we know who the vars are and their info, so we can decide + // TODO: vars that depend on other vars + for (var name in vars) { + var data = vars[name]; + var totalTimesShifted = sum(data.timesShifted); + if (totalTimesShifted === 0) { + continue; + } + if (totalTimesShifted != Math.max.apply(null, data.timesShifted)) { + // TODO: Handle multiple different shifts + continue; + } + if (funFinished[name]) continue; + // We have one shift size (and possible unshifted uses). Consider replacing this variable with a shifted clone. If + // the estimated benefit is >0, we will do it + if (data.defs === 1) { + data.benefit = totalTimesShifted - 2*(data.defs + (data.param ? 1 : 0)); + } + if (conservative) data.benefit = 0; + if (data.benefit > 0) { + funMore = true; // We will reprocess this function + for (var i = 0; i < 4; i++) { + if (data.timesShifted[i]) { + data.primaryShift = i; + } + } + } + } + //printErr(JSON.stringify(vars)); + function cleanNotes() { // We need to mark 'name' nodes as 'processed' in some passes here; this cleans the notes up + traverse(fun, function(node, type) { + if (node[0] === 'name' && node[2]) { + return node.slice(0, 2); + } + }); + } + cleanNotes(); + // Apply changes + function needsShift(name) { + return vars[name] && vars[name].primaryShift >= 0; + } + for (var name in vars) { // add shifts for params and var's for all new variables + var data = vars[name]; + if (needsShift(name)) { + if (data.param) { + fun[3].unshift(['var', [[name + '$s' + data.primaryShift, ['binary', '>>', ['name', name], ['num', data.primaryShift]]]]]); + } else { + fun[3].unshift(['var', [[name + '$s' + data.primaryShift]]]); + } + } + } + var stack = []; + traverse(fun, function(node, type) { // add shift to assignments + stack.push(node); + if (node[0] === 'assign' && node[1] === true && node[2][0] === 'name' && needsShift(node[2][1]) && !node[2][2]) { + var name = node[2][1]; + var data = vars[name]; + var parent = stack[stack.length-3]; + var statements = getStatements(parent); + assert(statements, 'Invalid parent for assign-shift: ' + dump(parent)); + var i = statements.indexOf(stack[stack.length-2]); + statements.splice(i+1, 0, ['stat', ['assign', true, ['name', name + '$s' + data.primaryShift], ['binary', '>>', ['name', name, true], ['num', data.primaryShift]]]]); + } else if (node[0] === 'var') { + var args = node[1]; + for (var i = 0; i < args.length; i++) { + var arg = args[i]; + var name = arg[0]; + var data = vars[name]; + if (arg[1] && needsShift(name)) { + args.splice(i+1, 0, [name + '$s' + data.primaryShift, ['binary', '>>', ['name', name, true], ['num', data.primaryShift]]]); + } + } + return node; + } + }); + cleanNotes(); + var stack = []; + traverse(fun, function(node, type) { // replace shifted name with new variable + stack.push(node); + if (node[0] === 'binary' && node[1] === '>>' && node[2][0] === 'name' && needsShift(node[2][1]) && node[3][0] === 'num') { + var name = node[2][1]; + var data = vars[name]; + var parent = stack[stack.length-2]; + // Don't modify in |x$sN = x >> 2|, in normal assigns and in var assigns + if (parent[0] === 'assign' && parent[2][0] === 'name' && parent[2][1] === name + '$s' + data.primaryShift) return; + if (parent[0] === name + '$s' + data.primaryShift) return; + if (node[3][1] === data.primaryShift) { + return ['name', name + '$s' + data.primaryShift]; + } + } + }); + cleanNotes(); + var SIMPLE_SHIFTS = set('<<', '>>'); + var more = true; + while (more) { // combine shifts in the same direction as an optimization + more = false; + traverse(fun, function(node, type) { + if (node[0] === 'binary' && node[1] in SIMPLE_SHIFTS && node[2][0] === 'binary' && node[2][1] === node[1] && + node[3][0] === 'num' && node[2][3][0] === 'num') { // do not turn a << b << c into a << b + c; while logically identical, it is slower + more = true; + return ['binary', node[1], node[2][2], ['num', node[3][1] + node[2][3][1]]]; + } + }); + } + // Before recombining, do some additional optimizations + traverse(fun, function(node, type) { + // Apply constant shifts onto constants + if (type === 'binary' && node[1] === '>>' && node[2][0] === 'num' && node[3][0] === 'num' && node[3][1] <= MAX_SHIFTS) { + var subNode = node[2]; + var shifts = node[3][1]; + var result = subNode[1] / Math.pow(2, shifts); + if (result % 1 === 0) { + subNode[1] = result; + return subNode; + } + } + // Optimize the case of ($a*80)>>2 into ($a*20)|0 + if (type === 'binary' && node[1] in SIMPLE_SHIFTS && + node[2][0] === 'binary' && node[2][1] === '*') { + var mulNode = node[2]; + if (mulNode[2][0] === 'num') { + var temp = mulNode[2]; + mulNode[2] = mulNode[3]; + mulNode[3] = temp; + } + if (mulNode[3][0] === 'num') { + if (node[1] === '<<') { + mulNode[3][1] *= Math.pow(2, node[3][1]); + node[1] = '|'; + node[3][1] = 0; + return node; + } else { + if (mulNode[3][1] % Math.pow(2, node[3][1]) === 0) { + mulNode[3][1] /= Math.pow(2, node[3][1]); + node[1] = '|'; + node[3][1] = 0; + return node; + } + } + } + } + /* XXX - theoretically useful optimization(s), but commented out as not helpful in practice + // Transform (x << 2) >> 2 into x & mask or something even simpler + if (type === 'binary' && node[1] === '>>' && node[3][0] === 'num' && + node[2][0] === 'binary' && node[2][1] === '<<' && node[2][3][0] === 'num' && node[3][1] === node[2][3][1]) { + var subNode = node[2]; + var shifts = node[3][1]; + var mask = ((0xffffffff << shifts) >>> shifts) | 0; + return ['binary', '&', subNode[2], ['num', mask]]; + //return ['binary', '|', subNode[2], ['num', 0]]; + //return subNode[2]; + } + */ + }); + // Re-combine remaining shifts, to undo the breaking up we did before. may require reordering inside +'s + var stack = []; + traverse(fun, function(node, type) { + stack.push(node); + if (type === 'binary' && node[1] === '+' && (stack[stack.length-2][0] != 'binary' || stack[stack.length-2][1] !== '+')) { + // 'Flatten' added items + var addedItems = []; + function flatten(node) { + if (node[0] === 'binary' && node[1] === '+') { + flatten(node[2]); + flatten(node[3]); + } else { + addedItems.push(node); + } + } + flatten(node); + var originalOrder = addedItems.slice(); + function key(node) { // a unique value for all relevant shifts for recombining, non-unique for stuff we don't need to bother with + function originalOrderKey(item) { + return -originalOrder.indexOf(item); + } + if (node[0] === 'binary' && node[1] in SIMPLE_SHIFTS) { + if (node[3][0] === 'num' && node[3][1] <= MAX_SHIFTS) return 2*node[3][1] + (node[1] === '>>' ? 100 : 0); // 0-106 + return (node[1] === '>>' ? 20000 : 10000) + originalOrderKey(node); + } + if (node[0] === 'num') return -20000 + node[1]; + return -10000 + originalOrderKey(node); // Don't modify the original order if we don't modify anything + } + for (var i = 0; i < addedItems.length; i++) { + if (addedItems[i][0] === 'string') return; // this node is not relevant for us + } + addedItems.sort(function(node1, node2) { + return key(node1) - key(node2); + }); + // Regenerate items, now sorted + var i = 0; + while (i < addedItems.length-1) { // re-combine inside addedItems + var k = key(addedItems[i]), k1 = key(addedItems[i+1]); + if (k === k1 && k >= 0 && k1 <= 106) { + addedItems[i] = ['binary', addedItems[i][1], ['binary', '+', addedItems[i][2], addedItems[i+1][2]], addedItems[i][3]]; + addedItems.splice(i+1, 1); + } else { + i++; + } + } + var num = 0; + for (i = 0; i < addedItems.length; i++) { // combine all numbers into one + if (addedItems[i][0] === 'num') { + num += addedItems[i][1]; + addedItems.splice(i, 1); + i--; + } + } + if (num != 0) { // add the numbers into an existing shift, we + // prefer (x+5)>>7 over (x>>7)+5 , since >>'s result is known to be 32-bit and is more easily optimized. + // Also, in the former we can avoid the parentheses, which saves a little space (the number will be bigger, + // so it might take more space, but normally at most one more digit). + var added = false; + for (i = 0; i < addedItems.length; i++) { + if (addedItems[i][0] === 'binary' && addedItems[i][1] === '>>' && addedItems[i][3][0] === 'num' && addedItems[i][3][1] <= MAX_SHIFTS) { + addedItems[i] = ['binary', '>>', ['binary', '+', addedItems[i][2], ['num', num << addedItems[i][3][1]]], addedItems[i][3]]; + added = true; + } + } + if (!added) { + addedItems.unshift(['num', num]); + } + } + var ret = addedItems.pop(); + while (addedItems.length > 0) { // re-create AST from addedItems + ret = ['binary', '+', ret, addedItems.pop()]; + } + return ret; + } + }); + // Note finished variables + for (var name in vars) { + funFinished[name] = true; + } + } + }); +} + +function optimizeShiftsConservative(ast) { + optimizeShiftsInternal(ast, true); +} + +function optimizeShiftsAggressive(ast) { + optimizeShiftsInternal(ast, false); +} + // We often have branchings that are simplified so one end vanishes, and // we then get // if (!(x < 5)) @@ -5276,6 +5667,8 @@ var passes = { removeAssignsToUndefined: removeAssignsToUndefined, //removeUnneededLabelSettings: removeUnneededLabelSettings, simplifyExpressions: simplifyExpressions, + optimizeShiftsConservative: optimizeShiftsConservative, + optimizeShiftsAggressive: optimizeShiftsAggressive, simplifyIfs: simplifyIfs, hoistMultiples: hoistMultiples, loopOptimizer: loopOptimizer, diff --git a/tools/system_libs.py b/tools/system_libs.py index 8969b98f46b11..caed6acdfecb3 100644 --- a/tools/system_libs.py +++ b/tools/system_libs.py @@ -234,6 +234,7 @@ def create_libcextra(): 'tgammal.c' ]], ['misc', [ + 'ffs.c', 'getopt.c', 'getopt_long.c', ]], diff --git a/tools/test-js-optimizer-t2-output.js b/tools/test-js-optimizer-t2-output.js new file mode 100644 index 0000000000000..691d0a66b90be --- /dev/null +++ b/tools/test-js-optimizer-t2-output.js @@ -0,0 +1,108 @@ +function shifty($id2) { + var c1$s2; + var $tp$s2; + var $parameters_addr$s2; + var $wavelet38$s2; + var _dwt_norms_real$s2; + var $a_addr$s2; + var _idents$s2; + var $id3$s3; + var $id2$s1 = $id2 >> 1; + q(HEAP32[$id >> 2]); + q(HEAP32[$id + 40 >> 2]); + q(HEAP32[$id + 80 >> 2]); + q(HEAP32[unknown1 + unknown2 + $id >> 2]); + q(HEAP32[unknown1 + $id + unknown2 >> 2]); + var localUnchanged1 = get(1), localUnchanged2 = get(1); + q(HEAP32[localUnchanged1 + $id + localUnchanged2 >> 2]); + q($id >> _something_); + $id = q(".."); + q($id << _somethingElse_); + pause(-1); + q(HEAP32[$id2$s1]); + q(HEAP32[$id2$s1]); + q(HEAP32[$id2$s1]); + q(HEAP32[$id2$s1]); + q(HEAP32[$id2$s1 + 20]); + q(HEAP32[$id2$s1 + 40]); + var $id3 = get(74), $id3$s3 = $id3 >> 3; + q(HEAP32[$id3$s3]); + q(HEAP32[$id3$s3 + 5]); + q(HEAP32[$id3$s3 + 10]); + q($id3); + pause(0); + var _idents = get("abc"), _idents$s2 = _idents >> 2; + q(HEAP32[HEAP32[_idents$s2] + 8 >> 2]); + q(HEAP32[HEAP32[_idents$s2] + 8 >> 2]); + q(HEAP32[HEAP32[_idents$s2] + 8 >> 2]); + pause(1); + var $sn_addr = get(12), $a_addr = get(999), $a_addr$s2 = $a_addr >> 2; + var $i = get(112233); + q(HEAP32[(($sn_addr - 1 << 1) + 1 << 2 >> 2) + $a_addr$s2]); + q(HEAP32[(($i - 1 << 1) + 1 << 2 >> 2) + $a_addr$s2]); + q(HEAP32[($i << 3 >> 2) + $a_addr$s2]); + q(HEAP32[($i << 2 >> 2) + $a_addr$s2]); + q($a_addr$s2, z($a_addr$s2)); + pause(2); + var $level = HEAP[get(322) >> 2]; + var _dwt_norms_real = get("a"), _dwt_norms_real$s2 = _dwt_norms_real >> 2, $orient = get("cheez"); + q(HEAP32[($level << 3 >> 2) + _dwt_norms_real$s2 + ($orient * 20 | 0)]); + q(HEAP32[(($level << 3) + 4 >> 2) + _dwt_norms_real$s2 + ($orient * 20 | 0)]); + q(HEAP32[(($level << 3) + 8 >> 2) + _dwt_norms_real$s2 + ($orient * 20 | 0)]); + pause(3); + var $wavelet38 = get(38), $wavelet38$s2 = $wavelet38 >> 2; + $k = $a_addr; + q(HEAPF32[HEAP32[$wavelet38$s2] + ($k << 4) + 8 >> 2]); + q(HEAPF32[HEAP32[$wavelet38$s2] + ($k << 4) + 12 >> 2]); + q(HEAPF32[HEAP32[$wavelet38$s2] + ($k << 4) + 400 >> 2]); + pause(4); + var $p = $k, $parameters_addr = get("burger"), $parameters_addr$s2 = $parameters_addr >> 2; + q(HEAP32[(($p << 2) + 5624 >> 2) + $parameters_addr$s2]); + q(HEAP32[(($p << 2) + 5644 >> 2) + $parameters_addr$s2]); + q(HEAP32[(($p << 2) + 5664 >> 2) + $parameters_addr$s2]); + pause(5); + var $res_spec242 = get($real), $cp = get("b"), $tileno = arguments[2]; + q(HEAP32[(($res_spec242 - 1 << 2) + 5624 >> 2) + $parameters_addr$s2]); + q(HEAP32[(HEAP32[$cp + 108 >> 2] + 420 >> 2) + ($tileno * 1397 | 0)]); + pause(6); + q($idx << 3); + q(1 << $idx << 1); + print(INDENT + "Entering: _main" + "hi"); + pause(7); + var $tp = get("tp"), $tp$s2 = $tp >> 2; + q($tp$s2); + q($tp$s2); + q($tp$s2); + HEAP32[$H400] = $tp; + HEAP32[$tp] = 5; + HEAP32[$tp$s2] = 5; + HEAP32[HEAP[$tp$s2]] = 5; + HEAP32[HEAP[$tp$s2] >> 2] = 5; + pause(7); + q(go() >> 1 << 1); + q(go() << 1 >> 1); + q(go() >> 2); + q(go() << 2); + q(go() >> 8 << 8); + q(go() << 8 >> 8); + q(go() >> 16); + q(go() << 16); + q(go() + 2 >> 2); + var c1 = get(), c1$s2 = c1 >> 2; + HEAP32[c1$s2] = 1; + HEAP32[c1$s2 + 1] = 1; + HEAP32[(get() << 2 >> 2) + c1$s2] = 1; + var c2 = get(); + HEAP32[c2 >> 2] = 1; + HEAP32[c2 + 3 >> 2] = 1; + HEAP32[c2 + (get() << 2) >> 2] = 1; + var c3 = get(); + HEAP32[c3 >> 2] = 1; + HEAP32[c3 + 4 >> 2] = 1; + HEAP32[(get() << 1) + c3 >> 2] = 1; + var c4 = get(), c5 = get(); + HEAP32[c4 >> 2] = 1; + HEAP32[c4 + 4 >> 2] = 1; + HEAP32[c4 + c5 >> 2] = 1; +} + diff --git a/tools/test-js-optimizer-t2.js b/tools/test-js-optimizer-t2.js new file mode 100644 index 0000000000000..d3b2699646209 --- /dev/null +++ b/tools/test-js-optimizer-t2.js @@ -0,0 +1,109 @@ +// TODO also with >> 1 and >> 3 +// also HEAP*U*, and HEAP8, 16 +function shifty($id2) { + // $id is a non-ssa, $id2 is a param. both should be replaced with a shifted version + q(HEAP32[$id >> 2]); + q(HEAP32[($id + 40) >> 2]); + q(HEAP32[($id + 80 | 0) >> 2]); + q(HEAP32[(unknown1 + unknown2 + $id) >> 2]); + q(HEAP32[(unknown1 + $id + unknown2) >> 2]); // unknowns should be shifted together + var localUnchanged1 = get(1), localUnchanged2 = get(1); + q(HEAP32[(localUnchanged1 + $id + localUnchanged2) >> 2]); // unknowns should be shifted together + q($id >> _something_); // non-fixed shift + $id = q('..'); + q($id << _somethingElse_); // non-fixed shift + pause(-1); + q(HEAP32[$id2 >> 1]); + q(HEAP32[$id2 >> 1]); + q(HEAP32[$id2 >> 1]); + q(HEAP32[$id2 >> 1]); + q(HEAP32[($id2 + 40) >> 1]); + q(HEAP32[($id2 + 80 | 0) >> 1]); + var $id3 = get(74); + q(HEAP32[$id3 >> 3]); + q(HEAP32[($id3 + 40) >> 3]); + q(HEAP32[($id3 + 80 | 0) >> 3]); + q($id3); + pause(0); + // similar, but inside another HEAP + var _idents = get('abc'); + q(HEAP32[(HEAP32[_idents >> 2] + 8 | 0) >> 2]); + q(HEAP32[(HEAP32[_idents >> 2] + 8 | 0) >> 2]); + q(HEAP32[(HEAP32[_idents >> 2] + 8 | 0) >> 2]); + pause(1); + // $i's shifts should consolidate (the last should be 0..? + // since we may have had |0 in the middle!) + var $sn_addr = get(12), $a_addr = get(999); + var $i = get(112233); + q(HEAP32[($a_addr + ((($sn_addr - 1 << 1) + 1 | 0) << 2) | 0) >> 2]); + q(HEAP32[($a_addr + ((($i - 1 << 1) + 1 | 0) << 2) | 0) >> 2]); + q(HEAP32[($a_addr + (($i << 1 | 0) << 2) | 0) >> 2]); + q(HEAP32[($a_addr + ($i << 2)) >> 2]); + q($a_addr >> 2, z($a_addr >> 2)); + pause(2); + var $level = HEAP[get(322) >> 2]; // ignore this + var _dwt_norms_real = get('a'), $orient = get('cheez'); + q(HEAP32[(_dwt_norms_real + $orient * 80 + ($level << 3) | 0) >> 2]); + q(HEAP32[(_dwt_norms_real + $orient * 80 + ($level << 3) + 4 | 0) >> 2]); + q(HEAP32[(_dwt_norms_real + $orient * 80 + ($level << 3) + 8 | 0) >> 2]); + pause(3); + // reuse $a_addr here + var $wavelet38 = get(38); + $k = $a_addr; + q(HEAPF32[(HEAP32[$wavelet38 >> 2] + ($k << 4) + 8 | 0) >> 2]); + q(HEAPF32[(HEAP32[$wavelet38 >> 2] + ($k << 4) + 12 | 0) >> 2]); + q(HEAPF32[(HEAP32[$wavelet38 >> 2] + ($k << 4) + 400 | 0) >> 2]); + pause(4); + // reuse $k, which already reuses $a_addr + var $p = $k, $parameters_addr = get('burger') + q(HEAP32[($parameters_addr + 5624 + ($p << 2) | 0) >> 2]); + q(HEAP32[($parameters_addr + 5644 + ($p << 2) | 0) >> 2]); + q(HEAP32[($parameters_addr + 5664 + ($p << 2) | 0) >> 2]); + pause(5); + // loops count as more uses! + var $res_spec242 = get($real), $cp = get('b'), $tileno = arguments[2]; + q(HEAP32[($parameters_addr + 5624 + (($res_spec242 - 1 | 0) << 2) | 0) >> 2]); + q(HEAP32[(HEAP32[($cp + 108 | 0) >> 2] + $tileno * 5588 + 420 | 0) >> 2]); + pause(6); + q($idx << 1 << 2); + q(1 << $idx << 1); // Do not turn this into the slower 1 << $idx + 1 (which is identical though) + print(INDENT + "Entering: _main" + "hi"); // this order should not be modified + pause(7); + var $tp = get('tp'); + q($tp >> 2); + q($tp >> 2); + q($tp >> 2); + HEAP32[$H400] = $tp; + HEAP32[$tp] = 5; + HEAP32[$tp >> 2] = 5; + HEAP32[HEAP[$tp >> 2]] = 5; + HEAP32[HEAP[$tp >> 2] >> 2] = 5; + pause(7); + q(go() >> 1 << 1); + q(go() << 1 >> 1); + q(go() >> 1 >> 1); + q(go() << 1 << 1); + q(go() >> 8 << 8); + q(go() << 8 >> 8); + q(go() >> 8 >> 8); + q(go() << 8 << 8); + q((go() + 2) >> 2); // the 2 >> 2 can't be simplified + // only values provable to have lower bits clear are ok + var c1 = get(); // ok + HEAP32[c1 >> 2] = 1; + HEAP32[c1 + 4 >> 2] = 1; + HEAP32[c1 + (get() << 2) >> 2] = 1; + var c2 = get(); // bad constant + HEAP32[c2 >> 2] = 1; + HEAP32[c2 + 3 >> 2] = 1; + HEAP32[c2 + (get() << 2) >> 2] = 1; + var c3 = get(); // bad revshift + HEAP32[c3 >> 2] = 1; + HEAP32[c3 + 4 >> 2] = 1; + HEAP32[c3 + (get() << 1) >> 2] = 1; + var c4 = get(), c5 = get(); // bad unknown var + HEAP32[c4 >> 2] = 1; + HEAP32[c4 + 4 >> 2] = 1; + HEAP32[c4 + c5 >> 2] = 1; +} +// EMSCRIPTEN_GENERATED_FUNCTIONS: ["shifty"] diff --git a/tools/test-js-optimizer-t2c-output.js b/tools/test-js-optimizer-t2c-output.js new file mode 100644 index 0000000000000..43cdf889b2f43 --- /dev/null +++ b/tools/test-js-optimizer-t2c-output.js @@ -0,0 +1,17 @@ +function shifty() { + $pPage = HEAP32[$pCur_addr + ($26 << 16 >> 16 << 2) + 116 >> 2]; + var $ead_192394 = HEAP32[$pCur_addr + ($26 << 16 >> 16 << 2) + 116 >> 2]; + $pPage2 = HEAP32[($26 << 16 >> 16 << 2) + $pCur_addr + 116]; + var $ead_192394b = HEAP32[($26 << 16 >> 16 << 2) + $pCur_addr + 116]; + $pPage2 = HEAP32[($26 << 16 >> 16) + $pCur_addr + 116]; + var $ead_192394b = HEAP32[($26 << 16 >> 16) + $pCur_addr + 116]; + q(4); + q($13 + 8 >> 2); + q($13 + 28 >> 2); + q($13 + 60 >> 2); + q($13 + $15 + 12 >> 2); + q(HEAPF32[$output + ($j37 << 4) + 4 >> 2]); + q($13 + 13 << 2); + q(h() >> 2 << 2); +} + diff --git a/tools/test-js-optimizer-t2c.js b/tools/test-js-optimizer-t2c.js new file mode 100644 index 0000000000000..85292ba57ecb4 --- /dev/null +++ b/tools/test-js-optimizer-t2c.js @@ -0,0 +1,18 @@ +function shifty() { + $pPage = HEAP32[$pCur_addr + 116 + ($26 << 16 >> 16 << 2) >> 2]; + var $ead_192394 = HEAP32[$pCur_addr + 116 + ($26 << 16 >> 16 << 2) >> 2]; + $pPage2 = HEAP32[$pCur_addr + 116 + ($26 << 16 >> 16 << 2)]; + var $ead_192394b = HEAP32[$pCur_addr + 116 + ($26 << 16 >> 16 << 2)]; + $pPage2 = HEAP32[$pCur_addr + 116 + ($26 << 16 >> 16)]; + var $ead_192394b = HEAP32[$pCur_addr + 116 + ($26 << 16 >> 16)]; + // We prefer to do additions then shifts, so the shift happens last, because the shift output is known to be 32-bit. So these should not change + q(16 >> 2); + q($13 + 8 >> 2); + q(28 + $13 >> 2); + q(48 + $13 + 12 >> 2); + q($13 + $15 + 12 >> 2); + q(HEAPF32[$output + ($j37 << 4) + 4 >> 2]); + q(5 + $13 + 8 << 2); + q(((h() | 0) >> 2) << 2); // removing the shifts is dangerous +} +// EMSCRIPTEN_GENERATED_FUNCTIONS: ["shifty"] diff --git a/tools/test-js-optimizer-t3-output.js b/tools/test-js-optimizer-t3-output.js new file mode 100644 index 0000000000000..9a16c2accb96a --- /dev/null +++ b/tools/test-js-optimizer-t3-output.js @@ -0,0 +1,49 @@ +function _png_create_write_struct_2($user_png_ver, $error_ptr, $error_fn, $warn_fn, $mem_ptr, $malloc_fn, $free_fn) { + var $png_ptr$s2; + var label; + label = 2; + var setjmpTable = { + "2": (function(value) { + label = 5; + $call1 = value; + }), + dummy: 0 + }; + while (1) try { + switch (label) { + case 2: + var $png_ptr; + var $call = _png_create_struct(1); + $png_ptr = $call; + var $call1 = (HEAP32[$png_ptr >> 2] = label, 0); + label = 5; + break; + case 5: + var $2 = $png_ptr; + if (($call1 | 0) == 0) { + label = 4; + break; + } else { + label = 3; + break; + } + case 3: + var $4 = HEAP32[($png_ptr >> 2) + (148 >> 2)]; + _png_free($2, $4); + HEAP32[($png_ptr >> 2) + (148 >> 2)] = 0; + _png_destroy_struct($png_ptr); + var $retval_0 = 0; + label = 4; + break; + case 4: + var $retval_0; + return $retval_0; + default: + assert(0, "bad label: " + label); + } + } catch (e) { + if (!e.longjmp) throw e; + setjmpTable[e.label](e.value); + } +} + diff --git a/tools/test-js-optimizer-t3.js b/tools/test-js-optimizer-t3.js new file mode 100644 index 0000000000000..0e02f72bc36ed --- /dev/null +++ b/tools/test-js-optimizer-t3.js @@ -0,0 +1,50 @@ +function _png_create_write_struct_2($user_png_ver, $error_ptr, $error_fn, $warn_fn, $mem_ptr, $malloc_fn, $free_fn) { + var $png_ptr$s2; + var label; + label = 2; + var setjmpTable = { + "2": (function(value) { + label = 5; + $call1 = value; + }), + dummy: 0 + }; + while (1) try { + switch (label) { + case 2: + var $png_ptr; + var $call = _png_create_struct(1); + $png_ptr = $call; + var $call1 = (HEAP32[$png_ptr >> 2] = label, 0); + label = 5; + break; + case 5: + var $2 = $png_ptr; + if (($call1 | 0) == 0) { + label = 4; + break; + } else { + label = 3; + break; + } + case 3: + var $4 = HEAP32[($png_ptr >> 2) + (148 >> 2)]; + _png_free($2, $4); + HEAP32[($png_ptr >> 2) + (148 >> 2)] = 0; + _png_destroy_struct($png_ptr); + var $retval_0 = 0; + label = 4; + break; + case 4: + var $retval_0; + return $retval_0; + default: + assert(0, "bad label: " + label); + } + } catch (e) { + if (!e.longjmp) throw e; + setjmpTable[e.label](e.value); + } +} +// EMSCRIPTEN_GENERATED_FUNCTIONS: ["_png_create_write_struct_2"] +