diff --git a/AUTHORS b/AUTHORS index 6ab0b936eacef..26c8c24f799d2 100644 --- a/AUTHORS +++ b/AUTHORS @@ -193,3 +193,4 @@ a license to everyone to use it as detailed in LICENSE.) * Tim Guan-tin Chien * Krzysztof Jakubowski * Vladimír Vondruš +* Brion Vibber diff --git a/emar b/emar index adb2858202648..ac013038b44a4 100755 --- a/emar +++ b/emar @@ -40,7 +40,7 @@ if len(newargs) > 2: parts = base_name.split('.') parts[0] += '_' + h newname = '.'.join(parts) - full_newname = os.path.relpath(os.path.join(dir_name, newname)) + full_newname = os.path.join(dir_name, newname) if not os.path.exists(full_newname): try: # it is ok to fail here, we just don't get hashing shutil.copyfile(orig_name, full_newname) diff --git a/emcc b/emcc index cfec4a0effe12..7255407420137 100755 --- a/emcc +++ b/emcc @@ -172,7 +172,8 @@ There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR P exit(0) elif len(sys.argv) == 2 and sys.argv[1] == '-v': # -v with no inputs - print 'emcc (Emscripten gcc/clang-like replacement) %s' % shared.EMSCRIPTEN_VERSION + # autoconf likes to see 'GNU' in the output to enable shared object support + print 'emcc (Emscripten gcc/clang-like replacement + linker emulating GNU ld) %s' % shared.EMSCRIPTEN_VERSION code = subprocess.call([shared.CLANG, '-v']) shared.check_sanity(force=True) exit(code) @@ -1286,6 +1287,10 @@ try: # Emscripten logging.debug('LLVM => JS') extra_args = [] if not js_libraries else ['--libraries', ','.join(map(os.path.abspath, js_libraries))] + if memory_init_file: + shared.Settings.MEM_INIT_METHOD = 1 + else: + assert shared.Settings.MEM_INIT_METHOD != 1 final = shared.Building.emscripten(final, append_ext=False, extra_args=extra_args) if DEBUG: save_intermediate('original') @@ -1337,18 +1342,25 @@ try: js_transform_tempfiles = [final] - if memory_init_file: + if shared.Settings.MEM_INIT_METHOD > 0: memfile = target + '.mem' shared.try_delete(memfile) def repl(m): # handle chunking of the memory initializer - s = m.groups(0)[0] - if len(s) == 0 and not shared.Settings.EMTERPRETIFY: return m.group(0) # emterpreter must have a mem init file; otherwise, don't emit 0-size ones - open(memfile, 'wb').write(''.join(map(lambda x: chr(int(x or '0')), s.split(',')))) + s = m.group(1) + if len(s) == 0: return '' # don't emit 0-size ones + membytes = [int(x or '0') for x in s.split(',')] + while membytes and membytes[-1] == 0: + membytes.pop() + if not membytes: return '' + if not memory_init_file: + # memory initializer in a string literal + return "memoryInitializer = '%s';" % shared.JS.generate_string_initializer(list(membytes)) + open(memfile, 'wb').write(''.join(map(chr, membytes))) if DEBUG: # Copy into temp dir as well, so can be run there too shared.safe_copy(memfile, os.path.join(shared.get_emscripten_temp_dir(), os.path.basename(memfile))) - return 'var memoryInitializer = "%s";' % os.path.basename(memfile) + return 'memoryInitializer = "%s";' % os.path.basename(memfile) src = re.sub(shared.JS.memory_initializer_pattern, repl, open(final).read(), count=1) open(final + '.mem.js', 'w').write(src) final += '.mem.js' @@ -1562,7 +1574,7 @@ try: try: # move temp js to final position, alongside its mem init file shutil.move(final, js_target) - args = [shared.PYTHON, shared.path_from_root('tools', 'emterpretify.py'), js_target, final + '.em.js', json.dumps(shared.Settings.EMTERPRETIFY_BLACKLIST), json.dumps(shared.Settings.EMTERPRETIFY_WHITELIST), json.dumps(shared.Settings.EMTERPRETIFY_YIELDLIST), str(shared.Settings.SWAPPABLE_ASM_MODULE)] + args = [shared.PYTHON, shared.path_from_root('tools', 'emterpretify.py'), js_target, final + '.em.js', json.dumps(shared.Settings.EMTERPRETIFY_BLACKLIST), json.dumps(shared.Settings.EMTERPRETIFY_WHITELIST), '', str(shared.Settings.SWAPPABLE_ASM_MODULE)] if shared.Settings.EMTERPRETIFY_ASYNC: args += ['ASYNC=1'] if shared.Settings.EMTERPRETIFY_ADVISE: diff --git a/emscripten-version.txt b/emscripten-version.txt index fa911e3b71b7c..1368c976c81f7 100644 --- a/emscripten-version.txt +++ b/emscripten-version.txt @@ -1,2 +1,2 @@ -1.33.2 +1.34.0 diff --git a/emscripten.py b/emscripten.py index 53976cbaba508..5a7a245ef73b7 100755 --- a/emscripten.py +++ b/emscripten.py @@ -669,13 +669,13 @@ def math_fix(g): receiving = '' if settings['ASSERTIONS']: # assert on the runtime being in a valid state when calling into compiled code. The only exceptions are - # some support code like malloc TODO: verify that malloc is actually safe to use that way + # some support code receiving = '\n'.join(['var real_' + s + ' = asm["' + s + '"]; asm["' + s + '''"] = function() { assert(runtimeInitialized, 'you need to wait for the runtime to be ready (e.g. wait for main() to be called)'); assert(!runtimeExited, 'the runtime was exited (use NO_EXIT_RUNTIME to keep it alive after main() exits)'); return real_''' + s + '''.apply(null, arguments); }; -''' for s in exported_implemented_functions if s not in ['_malloc', '_free', '_memcpy', '_memset', 'runPostSets']]) +''' for s in exported_implemented_functions if s not in ['_memcpy', '_memset', 'runPostSets', '_emscripten_replace_memory']]) if not settings['SWAPPABLE_ASM_MODULE']: receiving += ';\n'.join(['var ' + s + ' = Module["' + s + '"] = asm["' + s + '"]' for s in exported_implemented_functions + function_tables]) diff --git a/site/source/conf.py b/site/source/conf.py index cf2e4e313362b..30658d3ea1a08 100644 --- a/site/source/conf.py +++ b/site/source/conf.py @@ -180,7 +180,7 @@ # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -html_favicon = 'emscripten.ico' +html_favicon = '_static/emscripten.ico' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, diff --git a/site/source/docs/api_reference/emscripten.h.rst b/site/source/docs/api_reference/emscripten.h.rst index a03d13dfdaafd..a33abcd4e4c9c 100644 --- a/site/source/docs/api_reference/emscripten.h.rst +++ b/site/source/docs/api_reference/emscripten.h.rst @@ -247,7 +247,7 @@ Functions :param int mode: The timing mode to use. Allowed values are EM_TIMING_SETTIMEOUT, EM_TIMING_RAF. - :param int value: The timing value to activate for the main loop. This value interpreted differently according to the ``mode``parameter: + :param int value: The timing value to activate for the main loop. This value interpreted differently according to the ``mode`` parameter: - If ``mode`` is EM_TIMING_SETTIMEOUT, then ``value`` specifies the number of milliseconds to wait between subsequent ticks to the main loop and updates occur independent of the vsync rate of the display (vsync off). This method uses the JavaScript ``setTimeout`` function to drive the animation. - If ``mode`` is EM_TIMING_RAF, then updates are performed using the ``requestAnimationFrame`` function (with vsync enabled), and this value is interpreted as a "swap interval" rate for the main loop. The value of ``1`` specifies the runtime that it should render at every vsync (typically 60fps), whereas the value ``2`` means that the main loop callback should be called only every second vsync (30fps). As a general formula, the value ``n`` means that the main loop is updated at every n'th vsync, or at a rate of ``60/n`` for 60Hz displays, and ``120/n`` for 120Hz displays. @@ -263,9 +263,10 @@ Functions Returns the current main loop timing mode that is in effect. For interpretation of the values, see the documentation of the function :c:func:`emscripten_set_main_loop_timing`. The timing mode is controlled by calling the functions :c:func:`emscripten_set_main_loop_timing` and :c:func:`emscripten_set_main_loop`. - :param int *mode: If not null, the used timing mode is returned here. - - :param int *value: If not null, the used timing value is returned here. + :param mode: If not null, the used timing mode is returned here. + :type mode: int* + :param value: If not null, the used timing value is returned here. + :type value: int* .. c:function:: void emscripten_set_main_loop_expected_blockers(int num) diff --git a/site/source/docs/api_reference/html5.h.rst b/site/source/docs/api_reference/html5.h.rst index 2de30fccc78e3..916da859900df 100644 --- a/site/source/docs/api_reference/html5.h.rst +++ b/site/source/docs/api_reference/html5.h.rst @@ -1207,7 +1207,8 @@ Functions .. note:: This function makes changes to the DOM to satisfy consistent presentation across browsers. These changes have been designed to intrude as little as possible, and the changes are cleared once windowed browsing is restored. If any of these changes are conflicting, see the function :c:func:`emscripten_request_fullscreen` instead, which performs a bare fullscreen request without any modifications to the DOM. - :param const EmscriptenFullscreenStrategy *fullscreenStrategy: [in] Points to a configuration structure filled by the caller which specifies display options for the fullscreen mode. + :param fullscreenStrategy: [in] Points to a configuration structure filled by the caller which specifies display options for the fullscreen mode. + :type fullscreenStrategy: const EmscriptenFullscreenStrategy* .. c:function:: EMSCRIPTEN_RESULT emscripten_exit_fullscreen(void) diff --git a/site/source/docs/api_reference/preamble.js.rst b/site/source/docs/api_reference/preamble.js.rst index 5970282e1a36d..fa7a8a177642a 100644 --- a/site/source/docs/api_reference/preamble.js.rst +++ b/site/source/docs/api_reference/preamble.js.rst @@ -52,7 +52,9 @@ Calling compiled C functions from JavaScript :param ident: The name of the C function to be called. :param returnType: The return type of the function. This can be ``"number"``, ``"string"`` or ``"array"``, which correspond to the appropriate JavaScript types (use ``"number"`` for any C pointer, and ``"array"`` for JavaScript arrays and typed arrays; note that arrays are 8-bit), or for a void function it can be ``null`` (note: the JavaScript ``null`` value, not a string containing the word "null"). + .. note:: 64-bit integers become two 32-bit parameters, for the low and high bits (since 64-bit integers cannot be represented in JavaScript numbers). + :param argTypes: An array of the types of arguments for the function (if there are no arguments, this can be omitted). Types are as in ``returnType``, except that ``array`` is not supported as there is no way for us to know the length of the array). :param args: An array of the arguments to the function, as native JavaScript values (as in ``returnType``). Note that string arguments will be stored on the stack (the JavaScript string will become a C string on the stack). :returns: The result of the function call as a native JavaScript value (as in ``returnType``). diff --git a/site/source/docs/building_from_source/verify_emscripten_environment.rst b/site/source/docs/building_from_source/verify_emscripten_environment.rst index 0cadc5d248004..c700287fd3801 100644 --- a/site/source/docs/building_from_source/verify_emscripten_environment.rst +++ b/site/source/docs/building_from_source/verify_emscripten_environment.rst @@ -98,6 +98,6 @@ Other common problems to check for are: python emcc -.. COMMENT:: **HamishW** Need to clarify if this last point on Python2 is Linux/Mac only, and if not, what needs to be done on Windows. +.. COMMENT : **HamishW** Need to clarify if this last point on Python2 is Linux/Mac only, and if not, what needs to be done on Windows. If none of the above is helpful, then please :ref:`contact us ` for help. diff --git a/site/source/docs/getting_started/test-suite.rst b/site/source/docs/getting_started/test-suite.rst index 3f73253508935..84fcb159a1f11 100644 --- a/site/source/docs/getting_started/test-suite.rst +++ b/site/source/docs/getting_started/test-suite.rst @@ -19,7 +19,7 @@ The whole core test suite can be run using the script `tests/runner.py ` - This may take several hours. - :term:`Node.js` cannot run all of the tests in the suite; if you need to run them all, you should get a recent trunk version of the `SpiderMonkey `_ shell. On Windows you can install and activate *SpiderMonkey* using the :ref:`emsdk`. diff --git a/site/source/docs/index.rst b/site/source/docs/index.rst index a0332b099fe87..ea2165e5e52e1 100644 --- a/site/source/docs/index.rst +++ b/site/source/docs/index.rst @@ -14,7 +14,7 @@ This comprehensive documentation set contains everything you need to know to use **Emscripten Fundamentals:** - :ref:`integrating-porting-index` illustrates the main differences between the native and Emscripten runtime environments, and explains the changes you need to make to prepare your C/C++ code for the Web. -- :ref:`optimizing-index` shows how to optimise your code for size and performance. +- :ref:`Optimizing-Code` shows how to optimise your code for size and performance. - :ref:`compiling-and-running-projects-index` demonstrates how to integrate Emscripten into your existing project build system. **Contributing:** diff --git a/site/source/docs/introducing_emscripten/Talks-and-Publications.rst b/site/source/docs/introducing_emscripten/Talks-and-Publications.rst index c64a7ead78fe5..6b3e7a010eced 100644 --- a/site/source/docs/introducing_emscripten/Talks-and-Publications.rst +++ b/site/source/docs/introducing_emscripten/Talks-and-Publications.rst @@ -8,10 +8,14 @@ Presentations ============= - Slides from CppCon 2014: - - `Emscripten & asm.js: C++'s role in the modern web `_ (`kripken `_) - - `Video of talk `_ - - `Connecting C++ and JavaScript on the Web with Embind `_ (`chadaustin `_) - - `Video of talk `_ + + - `Emscripten & asm.js: C++'s role in the modern web `_ (`kripken `_) + + - `Video of talk `_ + + - `Connecting C++ and JavaScript on the Web with Embind `_ (`chadaustin `_) + + - `Video of talk `_ - Slides from GDC 2014: `Getting started with asm.js and Emscripten `_ (`kripken `_, `lwagner `_) - Slides from Strange Loop 2013: `Native speed on the web, JavaScript and asm.js `_ (`kripken `_) diff --git a/site/source/docs/porting/connecting_cpp_and_javascript/embind.rst b/site/source/docs/porting/connecting_cpp_and_javascript/embind.rst index c03b0335ebcf8..4194c080edaad 100644 --- a/site/source/docs/porting/connecting_cpp_and_javascript/embind.rst +++ b/site/source/docs/porting/connecting_cpp_and_javascript/embind.rst @@ -24,7 +24,7 @@ passed to JavaScript. .. tip:: In addition to the code in this article: - There are many other examples of how to use *Embind* in the `Test Suite`_. - - `Connecting C++ and JavaScript on the Web with Embind>`_ (slides from + - `Connecting C++ and JavaScript on the Web with Embind`_ (slides from CppCon 2014) contains more examples and information about *Embind*'s design philosophy and implementation. @@ -787,7 +787,7 @@ real-world applications has proved to be more than acceptable. .. _Connecting C++ and JavaScript on the Web with Embind: http://chadaustin.me/2014/09/connecting-c-and-javascript-on-the-web-with-embind/ .. _Boost.Python: http://www.boost.org/doc/libs/1_56_0/libs/python/doc/ .. _finalizers: http://en.wikipedia.org/wiki/Finalizer -.. _Boost.Python-like raw pointer policies`: https://wiki.python.org/moin/boost.python/CallPolicy +.. _Boost.Python-like raw pointer policies: https://wiki.python.org/moin/boost.python/CallPolicy .. _Backbone.js: http://backbonejs.org/#Model-extend .. _Web Audio API: https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API .. _Making sine, square, sawtooth and triangle waves: http://stuartmemo.com/making-sine-square-sawtooth-and-triangle-waves/ diff --git a/site/source/docs/porting/files/packaging_files.rst b/site/source/docs/porting/files/packaging_files.rst index bcbb2c3fddba2..d640db8c9a488 100644 --- a/site/source/docs/porting/files/packaging_files.rst +++ b/site/source/docs/porting/files/packaging_files.rst @@ -75,6 +75,7 @@ This model is supported by changing the :js:attr:`Module.filePackagePrefixURL` t .. _packaging-files-packaged-file-location: Modifying file locations in the virtual file system +=================================================== The default approach for packaging is to directly map the nested file structure at compile time — relative to the compile-time command prompt directory — to the root of the virtual file system. The ``@`` symbol can be used in a path at build time to *explicitly* specify where the resource will be located in the virtual file system at runtime. diff --git a/site/source/docs/porting/index.rst b/site/source/docs/porting/index.rst index cdbc326fbb6ea..f237ec6470bf4 100644 --- a/site/source/docs/porting/index.rst +++ b/site/source/docs/porting/index.rst @@ -15,6 +15,7 @@ The topics in this section cover the main integration points that you need to co files/index multimedia_and_graphics/index Debugging + pthreads diff --git a/src/emrun_postjs.js b/src/emrun_postjs.js index bb253aa445d67..63da3f8ece0a5 100644 --- a/src/emrun_postjs.js +++ b/src/emrun_postjs.js @@ -1,4 +1,4 @@ -if (typeof window === "object" && !ENVIRONMENT_IS_PTHREAD) { +if (typeof window === "object" && (typeof ENVIRONMENT_IS_PTHREAD === 'undefined' || !ENVIRONMENT_IS_PTHREAD)) { function emrun_register_handlers() { function post(msg) { var http = new XMLHttpRequest(); @@ -19,6 +19,7 @@ if (typeof window === "object" && !ENVIRONMENT_IS_PTHREAD) { post('^pageload^'); } } + window.addEventListener('load', emrun_register_handlers); } // POSTs the given binary data represented as a (typed) array data back to the emrun-based web server. diff --git a/src/library_async.js b/src/library_async.js index 6742cbee8ec2c..f9104f367b5d7 100644 --- a/src/library_async.js +++ b/src/library_async.js @@ -222,7 +222,7 @@ mergeInto(LibraryManager.library, { #if ASSERTIONS abortDecorators.push(function(output, what) { if (EmterpreterAsync.state !== 0) { - return output + '\nThis error happened during an emterpreter-async save or load of the stack. Was there non-emterpreted code on the stack during save (which is unallowed)? You may want to adjust EMTERPRETIFY_BLACKLIST, EMTERPRETIFY_WHITELIST, or EMTERPRETIFY_YIELDLIST (to consider certain functions ok to run during an emscripten_sleep_with_yield).\nThis is what the stack looked like when we tried to save it: ' + [EmterpreterAsync.state, EmterpreterAsync.saveStack]; + return output + '\nThis error happened during an emterpreter-async save or load of the stack. Was there non-emterpreted code on the stack during save (which is unallowed)? You may want to adjust EMTERPRETIFY_BLACKLIST, EMTERPRETIFY_WHITELIST.\nThis is what the stack looked like when we tried to save it: ' + [EmterpreterAsync.state, EmterpreterAsync.saveStack]; } return output; }); diff --git a/src/library_formatString.js b/src/library_formatString.js index f7b91b407e285..3d4ee14341813 100644 --- a/src/library_formatString.js +++ b/src/library_formatString.js @@ -5,7 +5,7 @@ mergeInto(LibraryManager.library, { // Returns the resulting string string as a character array. _formatString__deps: ['strlen', '_reallyNegative'], _formatString: function(format, varargs) { - assert((varargs & 7) === 0); + assert((varargs & 3) === 0); var textIndex = format; var argIndex = 0; function getNextArg(type) { diff --git a/src/library_glfw.js b/src/library_glfw.js index 52519b85f540f..631aef9d3cfb9 100644 --- a/src/library_glfw.js +++ b/src/library_glfw.js @@ -32,7 +32,7 @@ ******************************************************************************/ var LibraryGLFW = { - $GLFW__deps: ['emscripten_get_now'], + $GLFW__deps: ['emscripten_get_now', '$GL'], $GLFW: { Window: function(id, width, height, title, monitor, share) { diff --git a/src/postamble.js b/src/postamble.js index abc9510f2d649..989c4a5a508be 100644 --- a/src/postamble.js +++ b/src/postamble.js @@ -1,6 +1,35 @@ // === Auto-generated postamble setup entry stuff === +#if MEM_INIT_METHOD == 2 +#if USE_PTHREADS +if (memoryInitializer && !ENVIRONMENT_IS_PTHREAD) (function(s) { +#else +if (memoryInitializer) (function(s) { +#endif + var i, n = s.length; +#if ASSERTIONS + n -= 4; + var crc, bit, table = new Int32Array(256); + for (i = 0; i < 256; ++i) { + for (crc = i, bit = 0; bit < 8; ++bit) + crc = (crc >>> 1) ^ ((crc & 1) * 0xedb88320); + table[i] = crc >>> 0; + } + crc = -1; + crc = table[(crc ^ n) & 0xff] ^ (crc >>> 8); + crc = table[(crc ^ (n >>> 8)) & 0xff] ^ (crc >>> 8); + for (i = 0; i < s.length; ++i) { + crc = table[(crc ^ s.charCodeAt(i)) & 0xff] ^ (crc >>> 8); + } + assert(crc === 0, "memory initializer checksum"); +#endif + for (i = 0; i < n; ++i) { + HEAPU8[STATIC_BASE + i] = s.charCodeAt(i); + } +})(memoryInitializer); +#else +#if MEM_INIT_METHOD == 1 #if USE_PTHREADS if (memoryInitializer && !ENVIRONMENT_IS_PTHREAD) { #else @@ -52,6 +81,8 @@ if (memoryInitializer) { } } } +#endif +#endif function ExitStatus(status) { this.name = "ExitStatus"; diff --git a/src/preamble.js b/src/preamble.js index 3c348a4ef2d7a..25c3d302d62d1 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -436,9 +436,10 @@ Module['allocate'] = allocate; // Allocate memory during any stage of startup - static memory early on, dynamic memory later, malloc when ready function getMemory(size) { if (!staticSealed) return Runtime.staticAlloc(size); - if (typeof _sbrk !== 'undefined' && !_sbrk.called) return Runtime.dynamicAlloc(size); + if ((typeof _sbrk !== 'undefined' && !_sbrk.called) || !runtimeInitialized) return Runtime.dynamicAlloc(size); return _malloc(size); } +Module['getMemory'] = getMemory; function Pointer_stringify(ptr, /* optional */ length) { if (length === 0 || !ptr) return ''; diff --git a/src/settings.js b/src/settings.js index b4868d7fd2857..ce632ea4b6d7d 100644 --- a/src/settings.js +++ b/src/settings.js @@ -35,6 +35,11 @@ var INVOKE_RUN = 1; // Whether we will run the main() function. Disable if you e // can do with Module.callMain(), with an optional parameter of commandline args). var NO_EXIT_RUNTIME = 0; // If set, the runtime is not quit when main() completes (allowing code to // run afterwards, for example from the browser main event loop). +var MEM_INIT_METHOD = 0; // How to represent the initial memory content. + // 0: keep array literal representing the initial memory data + // 1: create a *.mem file containing the binary data of the initial memory; + // use the --memory-init-file command line switch to select this method + // 2: embed a string literal representing that initial memory data var TOTAL_STACK = 5*1024*1024; // The total stack size. There is no way to enlarge the stack, so this // value must be large enough for the program's requirements. If // assertions are on, we will assert on not exceeding this, otherwise, @@ -464,22 +469,11 @@ var EMTERPRETIFY = 0; // Runs tools/emterpretify on the compiler output var EMTERPRETIFY_BLACKLIST = []; // Functions to not emterpret, that is, to run normally at full speed var EMTERPRETIFY_WHITELIST = []; // If this contains any functions, then only the functions in this list // are emterpreted (as if all the rest are blacklisted; this overrides the BLACKLIST) -var EMTERPRETIFY_YIELDLIST = []; // A list of functions that are allowed to run during while sleeping. Typically this is - // during emscripten_sleep_with_yield , but also you may need to add methods to this list - // for things like event handling (an SDL EventHandler will be called from the event, directly - - // if we do that later, you lose out on the whole point of an EventHandler, which is to let - // you react to key presses in order to launch fullscreen, etc.). - // Functions in the yield list do not trigger asserts checking on running during a sleep, - // in ASSERTIONS builds, var EMTERPRETIFY_ASYNC = 0; // Allows sync code in the emterpreter, by saving the call stack, doing an async delay, and resuming it var EMTERPRETIFY_ADVISE = 0; // Performs a static analysis to suggest which functions should be run in the emterpreter, as it // appears they can be on the stack when a sync function is called in the EMTERPRETIFY_ASYNC option. // After showing the suggested list, compilation will halt. You can apply the provided list as an // emcc argument when compiling later. - // This will also advise on the YIELDLIST, if it contains at least one value (it then reports - // all things reachable from that function, as they may need to be in the YIELDLIST as well). - // Note that this depends on things like inlining. If you run this with different inlining than - // when you use the list, it might not work. var RUNNING_JS_OPTS = 0; // whether js opts will be run, after the main compiler var BOOTSTRAPPING_STRUCT_INFO = 0; // whether we are in the generate struct_info bootstrap phase diff --git a/tests/cases/fcmp_constexpr.ll b/tests/cases/fcmp_constexpr.ll new file mode 100644 index 0000000000000..4bfa013ce8ceb --- /dev/null +++ b/tests/cases/fcmp_constexpr.ll @@ -0,0 +1,17 @@ +; ModuleID = 'tests/hello_world.bc' +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +@.str = private unnamed_addr constant [20 x i8] c"hello, world %.2f!\0A\00", align 1 + +declare i32 @printf(i8*, ...) + +define i32 @main() { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + %waka = select i1 fcmp ult (float fadd (float fmul (float undef, float 1.0), float 2.0), float 3.0), double 4.0, double 5.0 + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str, i32 0, i32 0), double %waka) + ret i32 0 +} + diff --git a/tests/cases/fcmp_constexpr.txt b/tests/cases/fcmp_constexpr.txt new file mode 100644 index 0000000000000..ca96a9dcd43eb --- /dev/null +++ b/tests/cases/fcmp_constexpr.txt @@ -0,0 +1 @@ +hello, world 4.00! diff --git a/tests/emscripten_log/emscripten_log.cpp b/tests/emscripten_log/emscripten_log.cpp index 1a2f3c23c8a79..69760b9cf1b16 100644 --- a/tests/emscripten_log/emscripten_log.cpp +++ b/tests/emscripten_log/emscripten_log.cpp @@ -134,6 +134,9 @@ void __attribute__((noinline)) Foo() // Arbitrary function signature to add some int main() { + int test = 123; + emscripten_log(EM_LOG_FUNC_PARAMS | EM_LOG_DEMANGLE | EM_LOG_CONSOLE, "test print %d\n", test); + Foo(); #ifdef REPORT_RESULT REPORT_RESULT(); diff --git a/tests/fuzz/csmith_driver.py b/tests/fuzz/csmith_driver.py index 46f6ac53f0887..84214258f1f1b 100755 --- a/tests/fuzz/csmith_driver.py +++ b/tests/fuzz/csmith_driver.py @@ -118,6 +118,10 @@ def try_js(args=[]): js_args += ['-s', 'EMTERPRETIFY_WHITELIST=["_main"]'] # the opposite direction if random.random() < 0.5: js_args += ['-s', 'EMTERPRETIFY_ASYNC=1'] + if random.random() < 0.5: + js_args += ["--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2"] + if random.random() < 0.5: + js_args += ['-s', 'ASSERTIONS=1'] print '(compile)', ' '.join(js_args) open(fullname, 'a').write('\n// ' + ' '.join(js_args) + '\n\n') try: diff --git a/tests/gen_large_switchcase.py b/tests/gen_large_switchcase.py new file mode 100644 index 0000000000000..b027a43975cc4 --- /dev/null +++ b/tests/gen_large_switchcase.py @@ -0,0 +1,27 @@ +import random, sys +num_cases = int(sys.argv[1]) +cases = '' +i = 1 +for x in range(0, num_cases): + cases += ' case ' + str(i) + ': return "' + str(i) + str(i) + str(i) + '";\n' + i += random.randint(1, 5) + +print '''#include +#include +#include + +const char *foo(int x) +{ + switch(x) + { +''' + cases + ''' + default: return ""; + } +} + +int main() +{ + for(int i = 0; i < 100; ++i) + printf("%s\\n", foo((int)(emscripten_get_now() * 1000) % ''' + str(i) + ''')); + printf("Success!\\n"); +}''' diff --git a/tests/glfw_minimal.c b/tests/glfw_minimal.c new file mode 100644 index 0000000000000..dee45643ef7b2 --- /dev/null +++ b/tests/glfw_minimal.c @@ -0,0 +1,27 @@ +#include +#include +#include +#define GLFW_INCLUDE_ES2 +#include + +int main() { + printf("main function started\n"); + if (glfwInit() != GL_TRUE) { + printf("glfwInit() failed\n"); + glfwTerminate(); + } else { + printf("glfwInit() success\n"); + if (glfwOpenWindow(640, 480, 8, 8, 8, 8, 16, 0, GLFW_WINDOW) != GL_TRUE){ + printf("glfwOpenWindow() failed\n"); + glfwTerminate(); + } else { + printf("glfwOpenWindow() success\n"); + } + } +#ifdef REPORT_RESULT + int result = 1; + REPORT_RESULT(); +#endif + return EXIT_SUCCESS; +} + diff --git a/tests/meminit_pairs.c b/tests/meminit_pairs.c new file mode 100644 index 0000000000000..32f087c571369 --- /dev/null +++ b/tests/meminit_pairs.c @@ -0,0 +1,18 @@ +unsigned char problematic[] = { 0x20, 0x7c, 0x02, 0x07, 0x5f, 0xa0, 0xdf }; +int main() { + unsigned char a, b; + int result = 0, i, j; + for (i = 0; i < sizeof(problematic); ++i) { + a = problematic[i] ^ 32; + for (j = 0; j < sizeof(problematic); ++j) { + b = problematic[j] ^ 32; + if (((const unsigned char)data[a][2*b]) != a || + ((const unsigned char)data[a][2*b + 1]) != b) { + result = 1; + printf("data[0x%02x][0x%03x]=%x02x\n", a, 2*b, data[a][2*b]); + printf("data[0x%02x][0x%03x]=%x02x\n", a, 2*b + 1, data[a][2*b + 1]); + } + } + } + REPORT_RESULT() +} diff --git a/tests/parallel_test_core.py b/tests/parallel_test_core.py index 94e0ed26de1d6..98aca0450f5d2 100755 --- a/tests/parallel_test_core.py +++ b/tests/parallel_test_core.py @@ -14,7 +14,7 @@ assert not os.environ.get('EM_SAVE_DIR'), 'Need separate directories to avoid the parallel tests clashing' # run slower ones first, to optimize total time -optimal_order = ['asm3i', 'asm1i', 'asm2nn', 'asm3', 'asm2', 'asm2g', 'asm2f', 'asm1', 'default'] +optimal_order = ['asm3i', 'asm1i', 'asm2nn', 'asm3', 'asm2', 'asm2m', 'asm2g', 'asm2f', 'asm1', 'default'] assert set(optimal_order) == set(test_modes), 'need to update the list of slowest modes' # set up a background thread to report progress diff --git a/tests/runner.py b/tests/runner.py index 98336b7c76911..ea5d7747d8003 100755 --- a/tests/runner.py +++ b/tests/runner.py @@ -38,7 +38,7 @@ def path_from_root(*pathelems): # Core test runner class, shared between normal tests and benchmarks checked_sanity = False -test_modes = ['default', 'asm1', 'asm2', 'asm3', 'asm2f', 'asm2g', 'asm1i', 'asm3i', 'asm2nn'] +test_modes = ['default', 'asm1', 'asm2', 'asm3', 'asm2f', 'asm2g', 'asm1i', 'asm3i', 'asm2m', 'asm2nn'] test_index = 0 use_all_engines = os.environ.get('EM_ALL_ENGINES') # generally js engines are equivalent, testing 1 is enough. set this @@ -62,6 +62,14 @@ def skipme(self): # used by tests we ask on the commandline to be skipped, see r def is_emterpreter(self): return False + def uses_memory_init_file(self): + if self.emcc_args is None: + return None + elif '--memory-init-file' in self.emcc_args: + return int(self.emcc_args[self.emcc_args.index('--memory-init-file')+1]) + else: + return ('-O2' in self.emcc_args or '-O3' in self.emcc_args or '-Oz' in self.emcc_args) and not Settings.SIDE_MODULE + def setUp(self): Settings.reset() self.banned_js_engines = [] @@ -252,16 +260,10 @@ def build(self, src, dirname, filename, output_processor=None, main_file=None, a output_processor(open(filename + '.o.js').read()) if self.emcc_args is not None: - if '--memory-init-file' in self.emcc_args: - memory_init_file = int(self.emcc_args[self.emcc_args.index('--memory-init-file')+1]) - else: - memory_init_file = ('-O2' in self.emcc_args or '-O3' in self.emcc_args or '-Oz' in self.emcc_args) and not Settings.SIDE_MODULE src = open(filename + '.o.js').read() - if memory_init_file: + if self.uses_memory_init_file(): # side memory init file, or an empty one in the js assert ('/* memory initializer */' not in src) or ('/* memory initializer */ allocate([]' in src) - else: - assert 'memory initializer */' in src or '/*' not in src # memory initializer comment, or cleaned-up source with no comments def validate_asmjs(self, err): if 'uccessfully compiled asm.js code' in err and 'asm.js link error' not in err: diff --git a/tests/runtime_misuse.cpp b/tests/runtime_misuse.cpp index 7a264d600dbe6..447146bec183b 100644 --- a/tests/runtime_misuse.cpp +++ b/tests/runtime_misuse.cpp @@ -6,18 +6,13 @@ extern "C" { int noted = 0; char* EMSCRIPTEN_KEEPALIVE note(int n) { + EM_ASM_({ Module.noted = $0 }, (int)¬ed); EM_ASM_({ Module.print([$0, $1]) }, n, noted); noted += n; EM_ASM_({ Module.print(['noted is now', $0]) }, noted); return (char*)"silly-string"; } -void free(void*) { // free is valid to call even after the runtime closes, so useful as a hack here for this test - EM_ASM_({ Module.print(['reporting', $0]) }, noted); - int result = noted; - REPORT_RESULT(); -} - } int main() { diff --git a/tests/runtime_misuse_2.cpp b/tests/runtime_misuse_2.cpp index 4da63d6651db2..2c901690defcd 100644 --- a/tests/runtime_misuse_2.cpp +++ b/tests/runtime_misuse_2.cpp @@ -6,6 +6,7 @@ extern "C" { int noted = 0; char* EMSCRIPTEN_KEEPALIVE note(int n) { + EM_ASM_({ Module.noted = $0 }, (int)¬ed); EM_ASM_({ Module.print([$0, $1]) }, n, noted); noted += n; EM_ASM_({ Module.print(['noted is now', $0]) }, noted); @@ -14,8 +15,6 @@ char* EMSCRIPTEN_KEEPALIVE note(int n) { void free(void*) { // free is valid to call even after the runtime closes, so useful as a hack here for this test EM_ASM_({ Module.print(['reporting', $0]) }, noted); - int result = noted; - REPORT_RESULT(); } } diff --git a/tests/test_browser.py b/tests/test_browser.py index d38771e55d85c..852e0fddba63e 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -621,7 +621,7 @@ def test_sdl_key(self): ]: for emterps in [ [], - ['-DTEST_SLEEP', '-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-s', 'ASSERTIONS=1', '-s', 'EMTERPRETIFY_YIELDLIST=["_EventHandler"]', '-s', "SAFE_HEAP=1"] + ['-DTEST_SLEEP', '-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-s', 'ASSERTIONS=1', '-s', "SAFE_HEAP=1"] ]: print delay, defines, emterps open(os.path.join(self.get_dir(), 'pre.js'), 'w').write(''' @@ -1110,6 +1110,10 @@ def test_glfw(self): self.btest('glfw.c', '1', args=['-s', 'LEGACY_GL_EMULATION=1']) self.btest('glfw.c', '1', args=['-s', 'LEGACY_GL_EMULATION=1', '-s', 'USE_GLFW=2']) + def test_glfw_minimal(self): + self.btest('glfw_minimal.c', '1', args=[]) + self.btest('glfw_minimal.c', '1', args=['-s', 'USE_GLFW=2']) + def test_egl(self): open(os.path.join(self.get_dir(), 'test_egl.c'), 'w').write(self.with_report_result(open(path_from_root('tests', 'test_egl.c')).read())) @@ -1736,7 +1740,13 @@ def test_runtime_misuse(self): doDirectCall(300); } - setTimeout(Module['_free'], 1000); // free is valid to call even after the runtime closes + setTimeout(function() { + var xhr = new XMLHttpRequest(); + assert(Module.noted); + xhr.open('GET', 'http://localhost:8888/report_result?' + HEAP32[Module.noted>>2]); + xhr.send(); + setTimeout(function() { window.close() }, 1000); + }, 1000); ''' open('pre_main.js', 'w').write(r''' @@ -2396,7 +2406,7 @@ def test_emterpreter_async_sleep2(self): self.btest('emterpreter_async_sleep2.cpp', '1', args=['-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-Oz']) def test_sdl_audio_beep_sleep(self): - self.btest('sdl_audio_beep_sleep.cpp', '1', args=['-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-Os', '-s', 'ASSERTIONS=1', '-s', 'DISABLE_EXCEPTION_CATCHING=0', '-profiling', '-s', 'EMTERPRETIFY_YIELDLIST=["__Z14audio_callbackPvPhi", "__ZN6Beeper15generateSamplesIhEEvPT_i", "__ZN6Beeper15generateSamplesIsEEvPT_i"]', '-s', 'SAFE_HEAP=1']) + self.btest('sdl_audio_beep_sleep.cpp', '1', args=['-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-Os', '-s', 'ASSERTIONS=1', '-s', 'DISABLE_EXCEPTION_CATCHING=0', '-profiling', '-s', 'SAFE_HEAP=1']) def test_mainloop_reschedule(self): self.btest('mainloop_reschedule.cpp', '1', args=['-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-Os']) @@ -2514,6 +2524,12 @@ def test_dynamic_link_glemu(self): self.btest(self.in_dir('main.cpp'), '1', args=['-s', 'MAIN_MODULE=1', '-O2', '-s', 'LEGACY_GL_EMULATION=1', '--pre-js', 'pre.js']) + def test_memory_growth_during_startup(self): + open('data.dat', 'w').write('X' * (30*1024*1024)) + self.btest('browser_test_hello_world.c', '0', args=['-s', 'ASSERTIONS=1', '-s', 'ALLOW_MEMORY_GROWTH=1', '-s', 'TOTAL_MEMORY=10000', '-s', 'TOTAL_STACK=5000', '--preload-file', 'data.dat']) + + # pthreads tests + # Test that the emscripten_ atomics api functions work. def test_pthread_atomics(self): self.btest(path_from_root('tests', 'pthread', 'test_pthread_atomics.cpp'), expected='0', args=['-O3', '-s', 'USE_PTHREADS=1', '-s', 'PTHREAD_POOL_SIZE=8']) @@ -2637,3 +2653,26 @@ def test_pthread_file_io(self): # Test that it is possible to send a signal via calling alarm(timeout), which in turn calls to the signal handler set by signal(SIGALRM, func); def test_sigalrm(self): self.btest(path_from_root('tests', 'sigalrm.cpp'), expected='0', args=['-O3']) + + def test_meminit_pairs(self): + d = 'const char *data[] = {\n "' + d += '",\n "'.join(''.join('\\x{:02x}\\x{:02x}'.format(i, j) + for j in range(256)) for i in range(256)) + with open(path_from_root('tests', 'meminit_pairs.c')) as f: + d += '"\n};\n' + f.read() + args = ["-O2", "--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2", "-s", "ASSERTIONS=1"] + self.btest(d, expected='0', args=args + ["--closure", "0"]) + self.btest(d, expected='0', args=args + ["--closure", "0", "-g"]) + self.btest(d, expected='0', args=args + ["--closure", "1"]) + + def test_meminit_big(self): + d = 'const char *data[] = {\n "' + d += '",\n "'.join([''.join('\\x{:02x}\\x{:02x}'.format(i, j) + for j in range(256)) for i in range(256)]*256) + with open(path_from_root('tests', 'meminit_pairs.c')) as f: + d += '"\n};\n' + f.read() + assert len(d) > (1 << 27) # more than 32M memory initializer + args = ["-O2", "--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2", "-s", "ASSERTIONS=1"] + self.btest(d, expected='0', args=args + ["--closure", "0"]) + self.btest(d, expected='0', args=args + ["--closure", "0", "-g"]) + self.btest(d, expected='0', args=args + ["--closure", "1"]) diff --git a/tests/test_core.py b/tests/test_core.py index be17f34be11d5..7f22606f17969 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -2224,6 +2224,11 @@ def test_bigswitch(self): 35040: GL_STREAM_DRAW (0x88E0) ''', args=['34962', '26214', '35040']) + def test_biggerswitch(self): + num_cases = 2000 # TODO: Increase this to ~20000 range, since seeing autogenerated code that reaches that many cases. + switch_case, err = Popen([PYTHON, path_from_root('tests', 'gen_large_switchcase.py'), str(num_cases)], stdout=PIPE, stderr=PIPE).communicate() + self.do_run(switch_case, 'Success!') + def test_indirectbr(self): Building.COMPILER_TEST_OPTS = filter(lambda x: x != '-g', Building.COMPILER_TEST_OPTS) @@ -4374,6 +4379,13 @@ def test_strstr(self): self.do_run_from_file(src, output) def test_fnmatch(self): + # Run one test without assertions, for additional coverage + assert 'asm2m' in test_modes + if self.run_name == 'asm2m': + i = self.emcc_args.index('ASSERTIONS=1') + assert i > 0 and self.emcc_args[i-1] == '-s' + self.emcc_args[i] = 'ASSERTIONS=0' + print 'flip assertions off' test_path = path_from_root('tests', 'core', 'fnmatch') src, output = (test_path + s for s in ('.c', '.out')) self.do_run_from_file(src, output) @@ -4549,7 +4561,7 @@ def process(filename): try_delete(mem_file) self.do_run(src, ('size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\n5 bytes to dev/null: 5\nok.\n \ntexte\n', 'size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\ntexte\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\n5 bytes to dev/null: 5\nok.\n'), post_build=post, extra_emscripten_args=['-H', 'libc/fcntl.h']) - if '-O2' in self.emcc_args: + if self.uses_memory_init_file(): assert os.path.exists(mem_file) def test_files_m(self): @@ -5491,7 +5503,6 @@ def test(): self.emcc_args += ['-s', 'EMTERPRETIFY_WHITELIST=["_frexpl"]'] # test double call assertions test() - # Tests the full SSE1 API. def test_sse1(self): return self.skip('TODO: This test fails due to bugs #2840, #3044, #3045, #3046 and #3048 (also see #3043 and #3049)') Settings.PRECISE_F32 = 1 # SIMD currently requires Math.fround @@ -5501,6 +5512,19 @@ def test_sse1(self): self.emcc_args = orig_args + mode self.do_run(open(path_from_root('tests', 'test_sse1.cpp'), 'r').read(), 'Success!') + # Tests the full SSE1 API. + def test_sse1_full(self): + return self.skip('TODO: This test fails due to bugs #2840, #3044, #3045, #3046 and #3048 (also see #3043 and #3049)') + if SPIDERMONKEY_ENGINE not in JS_ENGINES: return self.skip('test_sse1_full requires SpiderMonkey to run.') + Popen([CLANG, path_from_root('tests', 'test_sse1_full.cpp'), '-o', 'test_sse1_full'] + get_clang_native_args(), stdout=PIPE, stderr=PIPE).communicate() + native_result, err = Popen('./test_sse1_full', stdout=PIPE, stderr=PIPE).communicate() + + Settings.PRECISE_F32 = 1 # SIMD currently requires Math.fround + orig_args = self.emcc_args + for mode in [[], ['-s', 'SIMD=1']]: + self.emcc_args = orig_args + mode + ['-I' + path_from_root('tests')] + self.do_run(open(path_from_root('tests', 'test_sse1_full.cpp'), 'r').read(), native_result) + def test_simd(self): if self.is_emterpreter(): return self.skip('todo') @@ -7001,7 +7025,8 @@ def test_emscripten_log(self): # XXX Does not work in SpiderMonkey since callstacks cannot be captured when running in asm.js, see https://bugzilla.mozilla.org/show_bug.cgi?id=947996 self.banned_js_engines = [SPIDERMONKEY_ENGINE] if '-g' not in Building.COMPILER_TEST_OPTS: Building.COMPILER_TEST_OPTS.append('-g') - self.do_run('#define RUN_FROM_JS_SHELL\n' + open(path_from_root('tests', 'emscripten_log', 'emscripten_log.cpp')).read(), "Success!") + Building.COMPILER_TEST_OPTS += ['-DRUN_FROM_JS_SHELL'] + self.do_run(open(path_from_root('tests', 'emscripten_log', 'emscripten_log.cpp')).read(), "Success!") def test_float_literals(self): self.do_run_from_file(path_from_root('tests', 'test_float_literals.cpp'), path_from_root('tests', 'test_float_literals.out')) @@ -7387,6 +7412,7 @@ def setUp(self): asm2g = make_run("asm2g", compiler=CLANG, emcc_args=["-O2", "-g", "-s", "ASSERTIONS=1", "-s", "SAFE_HEAP=1"]) asm1i = make_run("asm1i", compiler=CLANG, emcc_args=["-O1", '-s', 'EMTERPRETIFY=1']) asm3i = make_run("asm3i", compiler=CLANG, emcc_args=["-O3", '-s', 'EMTERPRETIFY=1']) +asm2m = make_run("asm2m", compiler=CLANG, emcc_args=["-O2", "--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2", "-s", "ASSERTIONS=1"]) # Legacy test modes - asm2nn = make_run("asm2nn", compiler=CLANG, emcc_args=["-O2"], env={"EMCC_NATIVE_OPTIMIZER": "0"}) diff --git a/tests/test_interactive.py b/tests/test_interactive.py index 13a6a7ea2529f..98e176f6c6c17 100644 --- a/tests/test_interactive.py +++ b/tests/test_interactive.py @@ -110,14 +110,11 @@ def get_freealut_library(self): if WINDOWS and Building.which('cmake'): return self.get_library('freealut', os.path.join('hello_world.bc'), configure=['cmake', '.'], configure_args=['-DBUILD_TESTS=ON']) else: - return self.get_library('freealut', os.path.join('examples', 'hello_world.bc'), make_args=['EXEEXT=.bc']) + return self.get_library('freealut', [os.path.join('examples', '.libs', 'hello_world.bc'), os.path.join('src', '.libs', 'libalut.a')], make_args=['EXEEXT=.bc']) def test_freealut(self): - programs = self.get_freealut_library() - for program in programs: - assert os.path.exists(program) - Popen([PYTHON, EMCC, '-O2', program, '-o', 'page.html']).communicate() - self.run_browser('page.html', 'You should hear "Hello World!"') + Popen([PYTHON, EMCC, '-O2'] + self.get_freealut_library() + ['-o', 'page.html']).communicate() + self.run_browser('page.html', 'You should hear "Hello World!"') def test_vr(self): self.btest(path_from_root('tests', 'test_vr.c'), expected='0') diff --git a/tests/test_other.py b/tests/test_other.py index 7fcf26ecabb51..324288cc48410 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -22,6 +22,7 @@ def test_emcc(self): # -v, without input files output = Popen([PYTHON, compiler, '-v'], stdout=PIPE, stderr=PIPE).communicate() self.assertContained('''clang version''', output[1].replace('\r', ''), output[1].replace('\r', '')) + self.assertContained('''GNU''', output[0]) # --help output = Popen([PYTHON, compiler, '--help'], stdout=PIPE, stderr=PIPE).communicate() @@ -4418,13 +4419,6 @@ def test_emterpreter_advise(self): out, err = Popen([PYTHON, EMCC, path_from_root('tests', 'emterpreter_advise_funcptr.cpp'), '-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-s', 'EMTERPRETIFY_ADVISE=1'], stdout=PIPE).communicate() self.assertContained('-s EMTERPRETIFY_WHITELIST=\'["__Z4posti", "__Z5post2i", "__Z6middlev", "__Z7sleeperv", "__Z8recurserv", "_main"]\'', out) - self.assertNotContained('EMTERPRETIFY_YIELDLIST', out); - - out, err = Popen([PYTHON, EMCC, path_from_root('tests', 'emterpreter_advise_funcptr.cpp'), '-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-s', 'EMTERPRETIFY_ADVISE=1', '-s', 'EMTERPRETIFY_YIELDLIST=["__Z6middlev"]'], stdout=PIPE).communicate() - self.assertContained('-s EMTERPRETIFY_YIELDLIST=\'["__Z6middlev", "__Z7siblingii", "__Z7sleeperv", "__Z8recurserv", "_printf"]\'', out) - - out, err = Popen([PYTHON, EMCC, path_from_root('tests', 'emterpreter_advise_funcptr.cpp'), '-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-s', 'EMTERPRETIFY_ADVISE=1', '-s', 'EMTERPRETIFY_YIELDLIST=["__Z3pref"]'], stdout=PIPE).communicate() - self.assertContained('-s EMTERPRETIFY_YIELDLIST=\'["__Z3pref", "__Z7siblingii", "_printf"]\'', out) def test_link_with_a_static(self): for args in [[], ['-O2']]: @@ -4905,3 +4899,17 @@ def test_debug_asmLastOpts(self): out, err = Popen([PYTHON, EMCC, 'src.c', '-s', 'EXPORTED_FUNCTIONS=["_main", "_treecount"]', '--minify', '0', '-g4', '-Oz']).communicate() self.assertContained('hello, world!', run_js('a.out.js')) + def test_meminit_crc(self): + with open('src.c', 'w') as f: + f.write(r''' +#include +int main() { printf("Mary had a little lamb.\n"); } +''') + out, err = Popen([PYTHON, EMCC, 'src.c', '-O2', '--memory-init-file', '0', '-s', 'MEM_INIT_METHOD=2', '-s', 'ASSERTIONS=1']).communicate() + with open('a.out.js', 'r') as f: + d = f.read() + d = d.replace('Mary had', 'Paul had') + with open('a.out.js', 'w') as f: + f.write(d) + out = run_js('a.out.js', assert_returncode=None, stderr=subprocess.STDOUT) + self.assertContained('Assertion failed: memory initializer checksum', out) diff --git a/tests/test_sse1_full.cpp b/tests/test_sse1_full.cpp new file mode 100644 index 0000000000000..c5efdb3f57cb1 --- /dev/null +++ b/tests/test_sse1_full.cpp @@ -0,0 +1,141 @@ +// This file uses SSE1 by calling different functions with different interesting inputs and prints the results. +// Use a diff tool to compare the results between platforms. + +#include +#include "test_sse_full.h" + +int main() +{ + float *interesting_floats = get_interesting_floats(); + int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]); + assert(numInterestingFloats % 4 == 0); + + uint32_t *interesting_ints = get_interesting_ints(); + int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]); + assert(numInterestingInts % 4 == 0); + + // SSE1 Arithmetic instructions: + Ret_M128_M128(__m128, _mm_add_ps); + Ret_M128_M128(__m128, _mm_add_ss); + Ret_M128_M128(__m128, _mm_div_ps); + Ret_M128_M128(__m128, _mm_div_ss); + Ret_M128_M128(__m128, _mm_mul_ps); + Ret_M128_M128(__m128, _mm_mul_ss); + Ret_M128_M128(__m128, _mm_sub_ps); + Ret_M128_M128(__m128, _mm_sub_ss); + + // SSE1 Elementary Math functions: + Ret_M128(__m128, _mm_rcp_ps); + Ret_M128(__m128, _mm_rcp_ss); + Ret_M128(__m128, _mm_rsqrt_ps); + Ret_M128(__m128, _mm_rsqrt_ss); + Ret_M128(__m128, _mm_sqrt_ps); + Ret_M128(__m128, _mm_sqrt_ss); + + // SSE1 Logical instructions: + Ret_M128_M128(__m128, _mm_and_ps); + Ret_M128_M128(__m128, _mm_andnot_ps); + Ret_M128_M128(__m128, _mm_or_ps); + Ret_M128_M128(__m128, _mm_xor_ps); + + // SSE1 Compare instructions: + Ret_M128_M128(__m128, _mm_cmpeq_ps); + Ret_M128_M128(__m128, _mm_cmpeq_ss); + Ret_M128_M128(__m128, _mm_cmpge_ps); + Ret_M128_M128(__m128, _mm_cmpge_ss); + Ret_M128_M128(__m128, _mm_cmpgt_ps); + Ret_M128_M128(__m128, _mm_cmpgt_ss); + Ret_M128_M128(__m128, _mm_cmple_ps); + Ret_M128_M128(__m128, _mm_cmple_ss); + Ret_M128_M128(__m128, _mm_cmplt_ps); + Ret_M128_M128(__m128, _mm_cmplt_ss); + Ret_M128_M128(__m128, _mm_cmpneq_ps); + Ret_M128_M128(__m128, _mm_cmpneq_ss); + Ret_M128_M128(__m128, _mm_cmpnge_ps); + Ret_M128_M128(__m128, _mm_cmpnge_ss); + Ret_M128_M128(__m128, _mm_cmpngt_ps); + Ret_M128_M128(__m128, _mm_cmpngt_ss); + Ret_M128_M128(__m128, _mm_cmpnle_ps); + Ret_M128_M128(__m128, _mm_cmpnle_ss); + Ret_M128_M128(__m128, _mm_cmpnlt_ps); + Ret_M128_M128(__m128, _mm_cmpnlt_ss); + Ret_M128_M128(__m128, _mm_cmpord_ps); + Ret_M128_M128(__m128, _mm_cmpord_ss); + Ret_M128_M128(__m128, _mm_cmpunord_ps); + Ret_M128_M128(__m128, _mm_cmpunord_ss); + + Ret_M128_M128(int, _mm_comieq_ss); + Ret_M128_M128(int, _mm_comige_ss); + Ret_M128_M128(int, _mm_comigt_ss); + Ret_M128_M128(int, _mm_comile_ss); + Ret_M128_M128(int, _mm_comilt_ss); + Ret_M128_M128(int, _mm_comineq_ss); + Ret_M128_M128(int, _mm_ucomieq_ss); + Ret_M128_M128(int, _mm_ucomige_ss); + Ret_M128_M128(int, _mm_ucomigt_ss); + Ret_M128_M128(int, _mm_ucomile_ss); + Ret_M128_M128(int, _mm_ucomilt_ss); + Ret_M128_M128(int, _mm_ucomineq_ss); + + // SSE1 Convert instructions: + Ret_M128_int(__m128, _mm_cvt_si2ss); + Ret_M128(int, _mm_cvt_ss2si); + Ret_M128_int(__m128, _mm_cvtsi32_ss); + Ret_M128(float, _mm_cvtss_f32); + Ret_M128(int, _mm_cvtss_si32); + Ret_M128(int64_t, _mm_cvtss_si64); + Ret_M128(int, _mm_cvtt_ss2si); + Ret_M128(int, _mm_cvttss_si32); + Ret_M128(int64_t, _mm_cvttss_si64); + + // SSE1 Load functions: + Ret_FloatPtr(__m128, _mm_load_ps, 4, 4); + Ret_FloatPtr(__m128, _mm_load_ps1, 1, 1); + Ret_FloatPtr(__m128, _mm_load_ss, 1, 1); + Ret_FloatPtr(__m128, _mm_load1_ps, 1, 1); + Ret_M128_FloatPtr(__m128, _mm_loadh_pi, __m64*, 2, 1); + Ret_M128_FloatPtr(__m128, _mm_loadl_pi, __m64*, 2, 1); + Ret_FloatPtr(__m128, _mm_loadr_ps, 4, 4); + Ret_FloatPtr(__m128, _mm_loadu_ps, 4, 1); + + // SSE1 Miscellaneous functions: + Ret_M128(int, _mm_movemask_ps); + + // SSE1 Move functions: + Ret_M128_M128(__m128, _mm_move_ss); + Ret_M128_M128(__m128, _mm_movehl_ps); + Ret_M128_M128(__m128, _mm_movelh_ps); + +/* + // SSE1 Set functions: + _mm_set_ps + _mm_set_ps1 + _mm_set_ss + _mm_set1_ps + _mm_setr_ps + _mm_setzero_ps +*/ + + // SSE1 Special Math instructions: + Ret_M128_M128(__m128, _mm_max_ps); + Ret_M128_M128(__m128, _mm_max_ss); + Ret_M128_M128(__m128, _mm_min_ps); + Ret_M128_M128(__m128, _mm_min_ss); + + // SSE1 Store instructions: + void_OutFloatPtr_M128(_mm_store_ps, float*, 16, 16); + void_OutFloatPtr_M128(_mm_store_ps1, float*, 16, 16); + void_OutFloatPtr_M128(_mm_store_ss, float*, 4, 1); + void_OutFloatPtr_M128(_mm_store1_ps, float*, 16, 16); + void_OutFloatPtr_M128(_mm_storeh_pi, __m64*, 8, 1); + void_OutFloatPtr_M128(_mm_storel_pi, __m64*, 8, 1); + void_OutFloatPtr_M128(_mm_storer_ps, float*, 16, 16); + void_OutFloatPtr_M128(_mm_storeu_ps, float*, 16, 1); + void_OutFloatPtr_M128(_mm_stream_ps, float*, 16, 16); + + // SSE1 Swizzle instructions: + Ret_M128_M128_Tint(__m128, _mm_shuffle_ps); + // _MM_TRANSPOSE4_PS + Ret_M128_M128(__m128, _mm_unpackhi_ps); + Ret_M128_M128(__m128, _mm_unpacklo_ps); +} diff --git a/tests/test_sse2_full.cpp b/tests/test_sse2_full.cpp new file mode 100644 index 0000000000000..f308668a01a5c --- /dev/null +++ b/tests/test_sse2_full.cpp @@ -0,0 +1,287 @@ +// This file uses SSE2 by calling different functions with different interesting inputs and prints the results. +// Use a diff tool to compare the results between platforms. + +#include +#define ENABLE_SSE2 +#include "test_sse_full.h" + +int main() +{ + float *interesting_floats = get_interesting_floats(); + int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]); + assert(numInterestingFloats % 4 == 0); + + uint32_t *interesting_ints = get_interesting_ints(); + int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]); + assert(numInterestingInts % 4 == 0); + + double *interesting_doubles = get_interesting_doubles(); + int numInterestingDoubles = sizeof(interesting_doubles_)/sizeof(interesting_doubles_[0]); + assert(numInterestingDoubles % 4 == 0); + + // SSE2 Arithmetic instructions: + M128i_M128i_M128i(_mm_add_epi16); + M128i_M128i_M128i(_mm_add_epi32); + M128i_M128i_M128i(_mm_add_epi64); + M128i_M128i_M128i(_mm_add_epi8); + Ret_M128d_M128d(__m128d, _mm_add_pd); + Ret_M128d_M128d(__m128d, _mm_add_sd); + + M128i_M128i_M128i(_mm_adds_epi16); + M128i_M128i_M128i(_mm_adds_epi8); + M128i_M128i_M128i(_mm_adds_epu16); + M128i_M128i_M128i(_mm_adds_epu8); + + Ret_M128d_M128d(__m128d, _mm_div_pd); + Ret_M128d_M128d(__m128d, _mm_div_sd); + + M128i_M128i_M128i(_mm_madd_epi16); + M128i_M128i_M128i(_mm_mul_epu32); + + Ret_M128d_M128d(__m128d, _mm_mul_pd); + Ret_M128d_M128d(__m128d, _mm_mul_sd); + + M128i_M128i_M128i(_mm_mulhi_epi16); + M128i_M128i_M128i(_mm_mulhi_epu16); + M128i_M128i_M128i(_mm_mullo_epi16); + M128i_M128i_M128i(_mm_sad_epu8); + M128i_M128i_M128i(_mm_sub_epi16); + M128i_M128i_M128i(_mm_sub_epi32); + M128i_M128i_M128i(_mm_sub_epi64); + M128i_M128i_M128i(_mm_sub_epi8); + + Ret_M128d_M128d(__m128d, _mm_sub_pd); + Ret_M128d_M128d(__m128d, _mm_sub_sd); + + M128i_M128i_M128i(_mm_subs_epi16); + M128i_M128i_M128i(_mm_subs_epi8); + M128i_M128i_M128i(_mm_subs_epu16); + M128i_M128i_M128i(_mm_subs_epu8); + + // SSE2 Cast functions: + Ret_M128d(__m128, _mm_castpd_ps); + Ret_M128d(__m128i, _mm_castpd_si128); + Ret_M128(__m128d, _mm_castps_pd); + Ret_M128(__m128i, _mm_castps_si128); + Ret_M128i(__m128d, _mm_castsi128_pd); + Ret_M128i(__m128, _mm_castsi128_ps); + + // SSE2 Compare instructions: + M128i_M128i_M128i(_mm_cmpeq_epi16); + M128i_M128i_M128i(_mm_cmpeq_epi32); + M128i_M128i_M128i(_mm_cmpeq_epi8); + Ret_M128d_M128d(__m128d, _mm_cmpeq_pd); + Ret_M128d_M128d(__m128d, _mm_cmpeq_sd); + Ret_M128d_M128d(__m128d, _mm_cmpge_pd); + Ret_M128d_M128d(__m128d, _mm_cmpge_sd); + M128i_M128i_M128i(_mm_cmpgt_epi16); + M128i_M128i_M128i(_mm_cmpgt_epi32); + M128i_M128i_M128i(_mm_cmpgt_epi8); + Ret_M128d_M128d(__m128d, _mm_cmpgt_pd); + Ret_M128d_M128d(__m128d, _mm_cmpgt_sd); + Ret_M128d_M128d(__m128d, _mm_cmple_pd); + Ret_M128d_M128d(__m128d, _mm_cmple_sd); + M128i_M128i_M128i(_mm_cmplt_epi16); + M128i_M128i_M128i(_mm_cmplt_epi32); + M128i_M128i_M128i(_mm_cmplt_epi8); + Ret_M128d_M128d(__m128d, _mm_cmplt_pd); + Ret_M128d_M128d(__m128d, _mm_cmplt_sd); + Ret_M128d_M128d(__m128d, _mm_cmpneq_pd); + Ret_M128d_M128d(__m128d, _mm_cmpneq_sd); + Ret_M128d_M128d(__m128d, _mm_cmpnge_pd); + Ret_M128d_M128d(__m128d, _mm_cmpnge_sd); + Ret_M128d_M128d(__m128d, _mm_cmpngt_pd); + Ret_M128d_M128d(__m128d, _mm_cmpngt_sd); + Ret_M128d_M128d(__m128d, _mm_cmpnle_pd); + Ret_M128d_M128d(__m128d, _mm_cmpnle_sd); + Ret_M128d_M128d(__m128d, _mm_cmpnlt_pd); + Ret_M128d_M128d(__m128d, _mm_cmpnlt_sd); + Ret_M128d_M128d(__m128d, _mm_cmpord_pd); + Ret_M128d_M128d(__m128d, _mm_cmpord_sd); + Ret_M128d_M128d(__m128d, _mm_cmpunord_pd); + Ret_M128d_M128d(__m128d, _mm_cmpunord_sd); + + Ret_M128d_M128d(int, _mm_comieq_sd); + Ret_M128d_M128d(int, _mm_comige_sd); + Ret_M128d_M128d(int, _mm_comigt_sd); + Ret_M128d_M128d(int, _mm_comile_sd); + Ret_M128d_M128d(int, _mm_comilt_sd); + Ret_M128d_M128d(int, _mm_comineq_sd); + Ret_M128d_M128d(int, _mm_ucomieq_sd); + Ret_M128d_M128d(int, _mm_ucomige_sd); + Ret_M128d_M128d(int, _mm_ucomigt_sd); + Ret_M128d_M128d(int, _mm_ucomile_sd); + Ret_M128d_M128d(int, _mm_ucomilt_sd); + Ret_M128d_M128d(int, _mm_ucomineq_sd); + + // SSE2 Convert instructions: + Ret_M128i(__m128d, _mm_cvtepi32_pd); + Ret_M128i(__m128, _mm_cvtepi32_ps); + Ret_M128d(__m128i, _mm_cvtpd_epi32); + Ret_M128d(__m128, _mm_cvtpd_ps); + Ret_M128(__m128i, _mm_cvtps_epi32); + Ret_M128(__m128d, _mm_cvtps_pd); + Ret_M128(double, _mm_cvtsd_f64); + Ret_M128d(int, _mm_cvtsd_si32); + Ret_M128d(int64_t, _mm_cvtsd_si64); +// Ret_M128d(int64_t, _mm_cvtsd_si64x); + Ret_M128i(int, _mm_cvtsi128_si32); + Ret_M128i(int64_t, _mm_cvtsi128_si64); +// Ret_M128i(int64_t, _mm_cvtsi128_si64x); + Ret_M128d_int(__m128d, _mm_cvtsi32_sd); + Ret_int(__m128i, _mm_cvtsi32_si128); + Ret_M128d_int64(__m128d, _mm_cvtsi64_sd); + Ret_int64(__m128i, _mm_cvtsi64_si128); +// Ret_int64(__m128d, _mm_cvtsi64x_sd); +// Ret_int64(__m128i, _mm_cvtsi64x_si128); + Ret_M128d_M128d(__m128d, _mm_cvtss_sd); + Ret_M128d(__m128i, _mm_cvttpd_epi32); + Ret_M128(__m128i, _mm_cvttps_epi32); + Ret_M128d(int, _mm_cvttsd_si32); + Ret_M128d(int64_t, _mm_cvttsd_si64); +// Ret_M128d(int64_t, _mm_cvttsd_si64x); + + // SSE2 Elementary Math Functions instructions: + Ret_M128d(__m128d, _mm_sqrt_pd); + Ret_M128d(__m128d, _mm_sqrt_ps); + + // SSE2 General Support instructions: + /* + _mm_clflush + _mm_lfence + _mm_mfence + _mm_pause + */ + + // SSE2 Load functions: + Ret_DoublePtr(__m128d, _mm_load_pd, 2, 2); + Ret_DoublePtr(__m128d, _mm_load_pd1, 1, 1); + Ret_DoublePtr(__m128d, _mm_load_sd, 1, 1); + Ret_IntPtr(__m128i, _mm_load_si128, __m128i*, 4, 4); + Ret_DoublePtr(__m128d, _mm_load1_pd, 1, 1); + Ret_M128d_DoublePtr(__m128d, _mm_loadh_pd, double*, 1, 1); + Ret_IntPtr(__m128i, _mm_loadl_epi64, __m128i*, 2, 1); + Ret_M128d_DoublePtr(__m128d, _mm_loadl_pd, double*, 1, 1); + Ret_DoublePtr(__m128d, _mm_loadr_pd, 2, 2); + Ret_DoublePtr(__m128d, _mm_loadu_pd, 2, 1); + Ret_IntPtr(__m128i, _mm_loadu_si128, __m128i*, 2, 1); + + // SSE2 Logical instructions: + Ret_M128d_M128d(__m128d, _mm_and_pd); + M128i_M128i_M128i(_mm_and_si128); + Ret_M128d_M128d(__m128d, _mm_andnot_pd); + M128i_M128i_M128i(_mm_andnot_si128); + Ret_M128d_M128d(__m128d, _mm_or_pd); + M128i_M128i_M128i(_mm_or_si128); + Ret_M128d_M128d(__m128d, _mm_xor_pd); + M128i_M128i_M128i(_mm_xor_si128); + + // SSE2 Miscellaneous instructions: + Ret_M128i(int, _mm_movemask_epi8); + Ret_M128d(int, _mm_movemask_pd); + M128i_M128i_M128i(_mm_packs_epi16); + M128i_M128i_M128i(_mm_packs_epi32); + M128i_M128i_M128i(_mm_packus_epi16); + M128i_M128i_M128i(_mm_sad_epu8); + + // SSE2 Move instructions: + Ret_M128i(__m128i, _mm_move_epi64); + Ret_M128d_M128d(__m128d, _mm_move_sd); + + // SSE2 Probability/Statistics instructions: + M128i_M128i_M128i(_mm_avg_epu16); + M128i_M128i_M128i(_mm_avg_epu8); + +/* + // SSE2 Set functions: + _mm_set_epi16 + _mm_set_epi32 + _mm_set_epi64 + _mm_set_epi64x + _mm_set_epi8 + _mm_set_pd + _mm_set_pd1 + _mm_set_sd + _mm_set1_epi16 + _mm_set1_epi32 + _mm_set1_epi64 + _mm_set1_epi64x + _mm_set1_epi8 + _mm_set1_pd + _mm_setr_epi16 + _mm_setr_epi32 + _mm_setr_epi64 + _mm_setr_epi8 + _mm_setr_pd + _mm_setzero_pd + _mm_setzero_si128 +*/ + // SSE2 Shift instructions: +// Ret_M128i_Tint(__m128i, _mm_bslli_si128); +// Ret_M128i_Tint(__m128i, _mm_bsrli_si128); + M128i_M128i_M128i(_mm_sll_epi16); + M128i_M128i_M128i(_mm_sll_epi32); + M128i_M128i_M128i(_mm_sll_epi64); + Ret_M128i_Tint(__m128i, _mm_slli_epi16); + Ret_M128i_Tint(__m128i, _mm_slli_epi32); + Ret_M128i_Tint(__m128i, _mm_slli_epi64); + Ret_M128i_Tint(__m128i, _mm_slli_si128); + M128i_M128i_M128i(_mm_sra_epi16); + M128i_M128i_M128i(_mm_sra_epi32); + Ret_M128i_Tint(__m128i, _mm_srai_epi16); + Ret_M128i_Tint(__m128i, _mm_srai_epi32); + M128i_M128i_M128i(_mm_srl_epi16); + M128i_M128i_M128i(_mm_srl_epi32); + M128i_M128i_M128i(_mm_srl_epi64); + Ret_M128i_Tint(__m128i, _mm_srli_epi16); + Ret_M128i_Tint(__m128i, _mm_srli_epi32); + Ret_M128i_Tint(__m128i, _mm_srli_epi64); +// Ret_M128i_Tint(__m128i, _mm_srli_epi128); + + // SSE2 Special Math instructions: + M128i_M128i_M128i(_mm_max_epi16); + M128i_M128i_M128i(_mm_max_epu8); + Ret_M128d_M128d(__m128d, _mm_max_pd); + Ret_M128d_M128d(__m128d, _mm_max_sd); + M128i_M128i_M128i(_mm_min_epi16); + M128i_M128i_M128i(_mm_min_epu8); + Ret_M128d_M128d(__m128d, _mm_min_pd); + Ret_M128d_M128d(__m128d, _mm_min_sd); + + // SSE2 Store instructions: + void_M128i_M128i_OutIntPtr(_mm_maskmoveu_si128, char*, 16, 1); + void_OutDoublePtr_M128d(_mm_store_pd, double*, 16, 16); +// void_OutDoublePtr_M128d(_mm_store_pd1, double*, 16, 16); + void_OutDoublePtr_M128d(_mm_store_sd, double*, 8, 1); + void_OutIntPtr_M128(_mm_store_si128, __m128i*, 16, 16); + void_OutDoublePtr_M128d(_mm_store1_pd, double*, 16, 16); + void_OutDoublePtr_M128d(_mm_storeh_pd, double*, 8, 1); + void_OutIntPtr_M128(_mm_storel_epi64, __m128i*, 8, 1); + void_OutDoublePtr_M128d(_mm_storel_pd, double*, 8, 1); + void_OutDoublePtr_M128d(_mm_storer_pd, double*, 16, 16); + void_OutDoublePtr_M128d(_mm_storeu_pd, double*, 16, 1); + void_OutIntPtr_M128(_mm_storeu_si128, __m128i*, 16, 1); + void_OutDoublePtr_M128d(_mm_stream_pd, double*, 16, 16); + void_OutIntPtr_M128(_mm_stream_si128, __m128i*, 16, 16); + void_OutIntPtr_int(_mm_stream_si32, int*, 4, 1); + void_OutIntPtr_int64(_mm_stream_si64, int64_t*, 8, 1); + + // SSE2 Swizzle instructions: + Ret_M128i_Tint(int, _mm_extract_epi16); + Ret_M128i_int_Tint(__m128i, _mm_insert_epi16); + Ret_M128i_Tint(__m128i, _mm_shuffle_epi32); + Ret_M128d_M128d_Tint(__m128d, _mm_shuffle_pd); + Ret_M128i_Tint(__m128i, _mm_shufflehi_epi16); + Ret_M128i_Tint(__m128i, _mm_shufflelo_epi16); + + M128i_M128i_M128i(_mm_unpackhi_epi16); + M128i_M128i_M128i(_mm_unpackhi_epi32); + M128i_M128i_M128i(_mm_unpackhi_epi64); + M128i_M128i_M128i(_mm_unpackhi_epi8); + Ret_M128d_M128d(__m128d, _mm_unpackhi_pd); + M128i_M128i_M128i(_mm_unpacklo_epi16); + M128i_M128i_M128i(_mm_unpacklo_epi32); + M128i_M128i_M128i(_mm_unpacklo_epi64); + M128i_M128i_M128i(_mm_unpacklo_epi8); + Ret_M128d_M128d(__m128d, _mm_unpacklo_pd); +} diff --git a/tests/test_sse_full.h b/tests/test_sse_full.h new file mode 100644 index 0000000000000..ba03509b13618 --- /dev/null +++ b/tests/test_sse_full.h @@ -0,0 +1,575 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +// Recasts floating point representation of f to an integer. +uint32_t fcastu(float f) { return *(uint32_t*)&f; } +uint64_t dcastu(double f) { return *(uint64_t*)&f; } +float ucastf(uint32_t t) { return *(float*)&t; } +double ucastd(uint64_t t) { return *(double*)&t; } + +// Data used in test. Store them global and access via a getter to confuse optimizer to not "solve" the whole test suite at compile-time, +// so that the operation will actually be performed at runtime, and not at compile-time. (Testing the capacity of the compiler to perform +// SIMD ops at compile-time would be interesting as well, but that's for another test) +float interesting_floats_[] = { -INFINITY, -FLT_MAX, -2.5f, -1.5f, -1.4f, -1.0f, -0.5f, -0.2f, -FLT_MIN, -0.f, 0.f, + 1.401298464e-45f, FLT_MIN, 0.3f, 0.5f, 0.8f, 1.0f, 1.5f, 2.5f, 3.5f, 3.6f, FLT_MAX, INFINITY, NAN, + ucastf(0x01020304), ucastf(0x80000000), ucastf(0x7FFFFFFF), ucastf(0xFFFFFFFF) }; + +double interesting_doubles_[] = { -INFINITY, -FLT_MAX, -2.5, -1.5, -1.4, -1.0, -0.5, -0.2, -FLT_MIN, -0.0, 0.0, + 1.401298464e-45, FLT_MIN, 0.3, 0.5, 0.8, 1.0, 1.5, 2.5, 3.5, 3.6, FLT_MAX, INFINITY, NAN, + ucastd(0x0102030405060708ULL), ucastd(0x8000000000000000ULL), ucastd(0x7FFFFFFFFFFFFFFFULL), ucastd(0xFFFFFFFFFFFFFFFFULL) }; + +uint32_t interesting_ints_[] = { 0, 1, 2, 3, 0x01020304, 0x10203040, 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x12345678, 0x9ABCDEF1, 0x80000000, + 0x80808080, 0x7F7F7F7F, 0x01010101, 0x11111111, 0x20202020, 0x0F0F0F0F, 0xF0F0F0F0, + fcastu(-INFINITY), fcastu(-FLT_MAX), fcastu(-2.5f), fcastu(-1.5f), fcastu(-1.4f), fcastu(-1.0f), fcastu(-0.5f), + fcastu(-0.2f), fcastu(-FLT_MIN), 0xF9301AB9, 0x0039AB12, 0x19302BCD, + fcastu(1.401298464e-45f), fcastu(FLT_MIN), fcastu(0.3f), fcastu(0.5f), fcastu(0.8f), fcastu(1.0f), fcastu(1.5f), + fcastu(2.5f), fcastu(3.5f), fcastu(3.6f), fcastu(FLT_MAX), fcastu(INFINITY), fcastu(NAN) }; + +bool always_true() { return time(NULL) != 0; } // This function always returns true, but the compiler should not know this. + +bool IsNan(float f) { return (fcastu(f) << 1) > 0xFF000000u; } + +char *SerializeFloat(float f, char *dstStr) +{ + if (!IsNan(f)) + { + int numChars = sprintf(dstStr, "%.9g", f); + return dstStr + numChars; + } + else + { + uint32_t u = fcastu(f); + int numChars = sprintf(dstStr, "NaN(0x%8X)", (unsigned int)u); + return dstStr + numChars; + } +} + +char *SerializeDouble(double f, char *dstStr) +{ + if (!IsNan(f)) + { + int numChars = sprintf(dstStr, "%.17g", f); + return dstStr + numChars; + } + else + { + uint64_t u = dcastu(f); + int numChars = sprintf(dstStr, "NaN(0x%08X%08X)", (unsigned int)(u>>32), (unsigned int)u); + return dstStr + numChars; + } +} + +void tostr(__m128 *m, char *outstr) +{ + union { __m128 m; float val[4]; } u; + u.m = *m; + char s[4][32]; + SerializeFloat(u.val[0], s[0]); + SerializeFloat(u.val[1], s[1]); + SerializeFloat(u.val[2], s[2]); + SerializeFloat(u.val[3], s[3]); + sprintf(outstr, "[%s,%s,%s,%s]", s[3], s[2], s[1], s[0]); +} + +#ifdef ENABLE_SSE2 + +void tostr(__m128i *m, char *outstr) +{ + union { __m128i m; uint32_t val[4]; } u; + u.m = *m; + sprintf(outstr, "[0x%08X,0x%08X,0x%08X,0x%08X]", u.val[3], u.val[2], u.val[1], u.val[0]); +} + +void tostr(__m128d *m, char *outstr) +{ + union { __m128d m; double val[2]; } u; + u.m = *m; + char s[2][64]; + SerializeDouble(u.val[0], s[0]); + SerializeDouble(u.val[1], s[1]); + sprintf(outstr, "[%s,%s]", s[1], s[0]); +} + +__m128i ExtractInRandomOrder(uint32_t *arr, int i, int n, int prime) +{ + return _mm_set_epi32(arr[(i*prime)%n], arr[((i+1)*prime)%n], arr[((i+2)*prime)%n], arr[((i+3)*prime)%n]); +} + +__m128d ExtractInRandomOrder(double *arr, int i, int n, int prime) +{ + return _mm_set_pd(arr[(i*prime)%n], arr[((i+1)*prime)%n]); +} +#endif + +void tostr(int *m, char *outstr) +{ + sprintf(outstr, "0x%08X", *m); +} + +void tostr(int64_t *m, char *outstr) +{ + sprintf(outstr, "0x%08X%08X", (int)(*m >> 32), (int)*m); +} + +void tostr(float *m, char *outstr) +{ + SerializeFloat(*m, outstr); +} + +void tostr(double *m, char *outstr) +{ + SerializeDouble(*m, outstr); +} + +void tostr(double *m, int numElems, char *outstr) +{ + char s[2][64]; + for(int i = 0; i < numElems; ++i) + SerializeDouble(m[i], s[i]); + switch(numElems) + { + case 1: sprintf(outstr, "{%s}", s[0]); break; + case 2: sprintf(outstr, "{%s,%s}", s[0], s[1]); break; + } +} + +void tostr(float *m, int numElems, char *outstr) +{ + char s[4][64]; + for(int i = 0; i < numElems; ++i) + SerializeFloat(m[i], s[i]); + switch(numElems) + { + case 1: sprintf(outstr, "{%s}", s[0]); break; + case 2: sprintf(outstr, "{%s,%s}", s[0], s[1]); break; + case 3: sprintf(outstr, "{%s,%s,%s}", s[0], s[1], s[2]); break; + case 4: sprintf(outstr, "{%s,%s,%s,%s}", s[0], s[1], s[2], s[3]); break; + } +} + +void tostr(int *s, int numElems, char *outstr) +{ + switch(numElems) + { + case 1: sprintf(outstr, "{0x%08X}", s[0]); break; + case 2: sprintf(outstr, "{0x%08X,0x%08X}", s[0], s[1]); break; + case 3: sprintf(outstr, "{0x%08X,0x%08X,0x%08X}", s[0], s[1], s[2]); break; + case 4: sprintf(outstr, "{0x%08X,0x%08X,0x%08X,0x%08X}", s[0], s[1], s[2], s[3]); break; + } +} + +void tostr(int64_t *m, int numElems, char *outstr) +{ + switch(numElems) + { + case 1: sprintf(outstr, "{0x%08X%08X}", (int)(*m >> 32), (int)*m); break; + case 2: sprintf(outstr, "{0x%08X%08X,0x%08X%08X}", (int)(*m >> 32), (int)*m, (int)(m[1] >> 32), (int)m[1]); + } +} + +// Accessors to the test data in a way that the compiler can't optimize at compile-time. +__attribute__((noinline)) float *get_interesting_floats() +{ + return always_true() ? interesting_floats_ : 0; +} + +__attribute__((noinline)) uint32_t *get_interesting_ints() +{ + return always_true() ? interesting_ints_ : 0; +} + +__attribute__((noinline)) double *get_interesting_doubles() +{ + return always_true() ? interesting_doubles_ : 0; +} + +__m128 ExtractInRandomOrder(float *arr, int i, int n, int prime) +{ + return _mm_set_ps(arr[(i*prime)%n], arr[((i+1)*prime)%n], arr[((i+2)*prime)%n], arr[((i+3)*prime)%n]); +} + +#define E1(arr, i, n) ExtractInRandomOrder(arr, i, n, 1) +#define E2(arr, i, n) ExtractInRandomOrder(arr, i, n, 1787) + +#define M128i_M128i_M128i(func) \ + for(int i = 0; i < numInterestingInts / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j < numInterestingInts / 4; ++j) \ + { \ + __m128i m1 = E1(interesting_ints, i*4+k, numInterestingInts); \ + __m128i m2 = E2(interesting_ints, j*4, numInterestingInts); \ + __m128i ret = func(m1, m2); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + +#define Ret_M128i_Tint_body(Ret_type, func, Tint) \ + for(int i = 0; i < numInterestingInts / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + { \ + __m128i m1 = E1(interesting_ints, i*4+k, numInterestingInts); \ + Ret_type ret = func(m1, Tint); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s, %d) = %s\n", #func, str, Tint, str2); \ + } + +#define Ret_M128i_int_Tint_body(Ret_type, func, Tint) \ + for(int i = 0; i < numInterestingInts / 4; ++i) \ + for(int j = 0; j < numInterestingInts; ++j) \ + for(int k = 0; k < 4; ++k) \ + { \ + __m128i m1 = E1(interesting_ints, i*4+k, numInterestingInts); \ + Ret_type ret = func(m1, interesting_ints[j], Tint); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s, 0x%08X, %d) = %s\n", #func, str, interesting_ints[j], Tint, str2); \ + } + +#define Ret_M128d_M128d_Tint_body(Ret_type, func, Tint) \ + for(int i = 0; i < numInterestingDoubles / 2; ++i) \ + for(int k = 0; k < 2; ++k) \ + for(int j = 0; j < numInterestingDoubles / 2; ++j) \ + { \ + __m128d m1 = E1(interesting_doubles, i*2+k, numInterestingDoubles); \ + __m128d m2 = E2(interesting_doubles, j*2, numInterestingDoubles); \ + Ret_type ret = func(m1, m2, Tint); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s, %d) = %s\n", #func, str, str2, Tint, str3); \ + } + +#define Ret_M128_M128_Tint_body(Ret_type, func, Tint) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j < numInterestingFloats / 4; ++j) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \ + Ret_type ret = func(m1, m2, Tint); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s, %d) = %s\n", #func, str, str2, Tint, str3); \ + } + +#define const_int8_unroll(Ret_type, F, func) \ + F(Ret_type, func, -1); \ + F(Ret_type, func, 0); \ + F(Ret_type, func, 1); \ + F(Ret_type, func, 2); \ + F(Ret_type, func, 3); \ + F(Ret_type, func, 5); \ + F(Ret_type, func, 7); \ + F(Ret_type, func, 11); \ + F(Ret_type, func, 13); \ + F(Ret_type, func, 15); \ + F(Ret_type, func, 16); \ + F(Ret_type, func, 17); \ + F(Ret_type, func, 23); \ + F(Ret_type, func, 29); \ + F(Ret_type, func, 31); \ + F(Ret_type, func, 37); \ + F(Ret_type, func, 43); \ + F(Ret_type, func, 47); \ + F(Ret_type, func, 59); \ + F(Ret_type, func, 127); \ + F(Ret_type, func, 128); \ + F(Ret_type, func, 191); \ + F(Ret_type, func, 254); \ + F(Ret_type, func, 255); \ + F(Ret_type, func, 309); + +#define Ret_M128i_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_Tint_body, func) +#define Ret_M128i_int_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_int_Tint_body, func) +#define Ret_M128d_M128d_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128d_M128d_Tint_body, func) +#define Ret_M128_M128_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128_M128_Tint_body, func) + +#define Ret_M128d_M128d(Ret_type, func) \ + for(int i = 0; i < numInterestingDoubles / 2; ++i) \ + for(int k = 0; k < 2; ++k) \ + for(int j = 0; j < numInterestingDoubles / 2; ++j) \ + { \ + __m128d m1 = E1(interesting_doubles, i*2+k, numInterestingDoubles); \ + __m128d m2 = E2(interesting_doubles, j*2, numInterestingDoubles); \ + Ret_type ret = func(m1, m2); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + +#define Ret_M128d_int(Ret_type, func) \ + for(int i = 0; i < numInterestingDoubles / 2; ++i) \ + for(int k = 0; k < 2; ++k) \ + for(int j = 0; j < numInterestingInts; ++j) \ + { \ + __m128d m1 = E1(interesting_doubles, i*2+k, numInterestingDoubles); \ + int m2 = interesting_ints[j]; \ + Ret_type ret = func(m1, m2); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + +#define Ret_M128d_int64(Ret_type, func) \ + for(int i = 0; i < numInterestingDoubles / 2; ++i) \ + for(int k = 0; k < 2; ++k) \ + for(int j = 0; j < numInterestingInts; ++j) \ + for(int l = 0; l < numInterestingInts; ++l) \ + { \ + __m128d m1 = E1(interesting_doubles, i*2+k, numInterestingDoubles); \ + int64_t m2 = (int64_t)(((uint64_t)interesting_ints[j]) << 32 | (uint64_t)interesting_ints[l]); \ + Ret_type ret = func(m1, m2); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + +#define Ret_M128d(Ret_type, func) \ + for(int i = 0; i < numInterestingDoubles / 2; ++i) \ + for(int k = 0; k < 2; ++k) \ + { \ + __m128d m1 = E1(interesting_doubles, i*2+k, numInterestingDoubles); \ + Ret_type ret = func(m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define Ret_DoublePtr(Ret_type, func, numElemsAccessed, inc) \ + for(int i = 0; i+numElemsAccessed <= numInterestingDoubles; i += inc) \ + { \ + double *ptr = interesting_doubles + i; \ + Ret_type ret = func(ptr); \ + char str[256]; tostr(ptr, numElemsAccessed, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +float tempOutFloatStore[16]; +float *getTempOutFloatStore(int alignmentBytes) +{ + uintptr_t addr = (uintptr_t)tempOutFloatStore; + addr = (addr + alignmentBytes - 1) & ~(alignmentBytes-1); + return (float*)addr; +} + +int *getTempOutIntStore(int alignmentBytes) { return (int*)getTempOutFloatStore(alignmentBytes); } +double *getTempOutDoubleStore(int alignmentBytes) { return (double*)getTempOutFloatStore(alignmentBytes); } + +#define void_OutFloatPtr_M128(func, Ptr_type, numBytesWritten, alignmentBytes) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \ + for(int k = 0; k < 4; ++k) \ + { \ + uintptr_t base = (uintptr_t)getTempOutFloatStore(16); \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + float *out = (float*)(base + offset); \ + func((Ptr_type)out, m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(out, numBytesWritten/sizeof(float), str2); \ + printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \ + } + +#define void_OutDoublePtr_M128d(func, Ptr_type, numBytesWritten, alignmentBytes) \ + for(int i = 0; i < numInterestingDoubles / 2; ++i) \ + for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \ + for(int k = 0; k < 2; ++k) \ + { \ + uintptr_t base = (uintptr_t)getTempOutDoubleStore(16); \ + __m128d m1 = E1(interesting_doubles, i*2+k, numInterestingDoubles); \ + double *out = (double*)(base + offset); \ + func((Ptr_type)out, m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(out, numBytesWritten/sizeof(double), str2); \ + printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \ + } + +#define void_OutIntPtr_M128(func, Ptr_type, numBytesWritten, alignmentBytes) \ + for(int i = 0; i < numInterestingInts / 4; ++i) \ + for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \ + for(int k = 0; k < 4; ++k) \ + { \ + uintptr_t base = (uintptr_t)getTempOutIntStore(16); \ + __m128 m1 = E1(interesting_ints, i*4+k, numInterestingInts); \ + int *out = (int*)(base + offset); \ + func((Ptr_type)out, m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(out, numBytesWritten/sizeof(int), str2); \ + printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \ + } + +#define void_OutIntPtr_int(func, Ptr_type, numBytesWritten, alignmentBytes) \ + for(int i = 0; i < numInterestingInts; ++i) \ + for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \ + for(int k = 0; k < 4; ++k) \ + { \ + uintptr_t base = (uintptr_t)getTempOutIntStore(16); \ + int m1 = interesting_ints[i]; \ + int *out = (int*)(base + offset); \ + func((Ptr_type)out, m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(out, numBytesWritten/sizeof(int), str2); \ + printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \ + } + +#define void_OutIntPtr_int64(func, Ptr_type, numBytesWritten, alignmentBytes) \ + for(int i = 0; i < numInterestingInts; ++i) \ + for(int j = 0; j < numInterestingInts; ++j) \ + for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \ + { \ + uintptr_t base = (uintptr_t)getTempOutIntStore(16); \ + int64_t m1 = (int64_t)(((uint64_t)interesting_ints[i]) << 32 | (uint64_t)interesting_ints[j]); \ + int64_t *out = (int64_t*)(base + offset); \ + func((Ptr_type)out, m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(out, numBytesWritten/sizeof(int64_t), str2); \ + printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \ + } + +#define void_M128i_M128i_OutIntPtr(func, Ptr_type, numBytesWritten, alignmentBytes) \ + for(int i = 0; i < numInterestingInts / 4; ++i) \ + for(int j = 0; j < numInterestingInts / 4; ++j) \ + for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \ + for(int k = 0; k < 4; ++k) \ + { \ + uintptr_t base = (uintptr_t)getTempOutIntStore(16); \ + __m128d m1 = E1(interesting_ints, i*4+k, numInterestingInts); \ + __m128i m2 = E2(interesting_ints, j*4, numInterestingInts); \ + int *out = (int*)(base + offset); \ + func(m1, m2, (Ptr_type)out); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(out, numBytesWritten/sizeof(int), str3); \ + printf("%s(%s, %s, p:align=%d) = %s\n", #func, str, str2, offset, str3); \ + } + +#define Ret_M128(Ret_type, func) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + Ret_type ret = func(m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define Ret_FloatPtr(Ret_type, func, numElemsAccessed, inc) \ + for(int i = 0; i+numElemsAccessed <= numInterestingFloats; i += inc) \ + { \ + float *ptr = interesting_floats + i; \ + Ret_type ret = func(ptr); \ + char str[256]; tostr(ptr, numElemsAccessed, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define Ret_IntPtr(Ret_type, func, Ptr_type, numElemsAccessed, inc) \ + for(int i = 0; i+numElemsAccessed <= numInterestingInts; i += inc) \ + { \ + uint32_t *ptr = interesting_ints + i; \ + Ret_type ret = func((Ptr_type)ptr); \ + char str[256]; tostr((int*)ptr, numElemsAccessed, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define Ret_M128_FloatPtr(Ret_type, func, Ptr_type, numElemsAccessed, inc) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j+numElemsAccessed <= numInterestingFloats; j += inc) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + float *ptr = interesting_floats + j; \ + Ret_type ret = func(m1, (Ptr_type)ptr); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(ptr, numElemsAccessed, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + +#define Ret_M128d_DoublePtr(Ret_type, func, Ptr_type, numElemsAccessed, inc) \ + for(int i = 0; i < numInterestingDoubles / 2; ++i) \ + for(int k = 0; k < 2; ++k) \ + for(int j = 0; j+numElemsAccessed <= numInterestingDoubles; j += inc) \ + { \ + __m128d m1 = E1(interesting_doubles, i*2+k, numInterestingDoubles); \ + double *ptr = interesting_doubles + j; \ + Ret_type ret = func(m1, (Ptr_type)ptr); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(ptr, numElemsAccessed, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + +#define Ret_M128i(Ret_type, func) \ + for(int i = 0; i < numInterestingInts / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + { \ + __m128i m1 = E1(interesting_ints, i*4+k, numInterestingInts); \ + Ret_type ret = func(m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define Ret_int(Ret_type, func) \ + for(int i = 0; i < numInterestingInts; ++i) \ + { \ + Ret_type ret = func(interesting_ints[i]); \ + char str[256]; tostr((int*)&interesting_ints[i], str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define Ret_int64(Ret_type, func) \ + for(int i = 0; i < numInterestingInts; ++i) \ + for(int j = 0; j < numInterestingInts; ++j) \ + { \ + int64_t m1 = (int64_t)(((uint64_t)interesting_ints[i]) << 32 | (uint64_t)interesting_ints[j]); \ + Ret_type ret = func(m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define Ret_M128_M128(Ret_type, func) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j < numInterestingFloats / 4; ++j) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \ + Ret_type ret = func(m1, m2); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + +#define Ret_M128_int(Ret_type, func) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j < numInterestingInts; ++j) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + int m2 = interesting_ints[j]; \ + Ret_type ret = func(m1, m2); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } diff --git a/tools/emterpretify.py b/tools/emterpretify.py index e713c2e1a805e..cf8667f89daf8 100755 --- a/tools/emterpretify.py +++ b/tools/emterpretify.py @@ -57,7 +57,6 @@ def handle_arg(arg): BLACKLIST = set(['_malloc', '_free', '_memcpy', '_memmove', '_memset', 'copyTempDouble', 'copyTempFloat', '_strlen', 'stackAlloc', 'setThrew', 'stackRestore', 'setTempRet0', 'getTempRet0', 'stackSave', 'runPostSets', '_emscripten_autodebug_double', '_emscripten_autodebug_float', '_emscripten_autodebug_i8', '_emscripten_autodebug_i16', '_emscripten_autodebug_i32', '_emscripten_autodebug_i64', '_strncpy', '_strcpy', '_strcat', '_saveSetjmp', '_testSetjmp', '_emscripten_replace_memory', '_bitshift64Shl', '_bitshift64Ashr', '_bitshift64Lshr', 'setAsyncState', 'emtStackSave']) WHITELIST = [] -YIELDLIST = ['stackSave', 'stackRestore', 'stackAlloc', 'setThrew', '_memset'] # functions which are ok to run while doing a sleep_with_yield. SYNC_FUNCS = set(['_emscripten_sleep', '_emscripten_sleep_with_yield', '_emscripten_wget_data', '_emscripten_idb_load', '_emscripten_idb_store', '_emscripten_idb_delete']) @@ -703,8 +702,6 @@ def process(code): infile = sys.argv[1] outfile = sys.argv[2] - original_yieldlist = YIELDLIST - extra_blacklist = [] if len(sys.argv) >= 4: temp = sys.argv[3] @@ -723,13 +720,6 @@ def process(code): WHITELIST = json.loads(temp) if len(sys.argv) >= 6: - temp = sys.argv[5] - if temp[0] == '"': - # response file - assert temp[1] == '@' - temp = open(temp[2:-1]).read() - YIELDLIST = YIELDLIST + json.loads(temp) - if len(sys.argv) >= 7: SWAPPABLE = int(sys.argv[6]) @@ -774,23 +764,6 @@ def process(code): print "Suggested list of functions to run in the emterpreter:" print " -s EMTERPRETIFY_WHITELIST='" + str(sorted(list(advised))).replace("'", '"') + "'" print "(%d%% out of %d functions)" % (int((100.0*len(advised))/len(can_call)), len(can_call)) - if len(YIELDLIST) > len(original_yieldlist): - # advise on the yield list as well. Anything a yield function can reach, likely needs to also be a yield function - YIELD_IGNORE = set(['abort']) - to_check = list(YIELDLIST) - advised = set([str(f) for f in YIELDLIST]) - while len(to_check) > 0: - curr = to_check.pop() - if curr not in can_call: continue - for next in can_call[curr]: - if next not in advised: - advised.add(str(next)) - to_check.append(next) - advised = [next for next in advised if not is_dyn_call(next) and not is_function_table(next) and not next in original_yieldlist and next not in SYNC_FUNCS and next not in YIELD_IGNORE and next[0] == '_'] - print - print "Suggested list of yield functions for the emterpreter:" - print " -s EMTERPRETIFY_YIELDLIST='" + str(sorted(list(advised))).replace("'", '"') + "'" - print "(%d%% out of %d functions)" % (int((100.0*len(advised))/len(can_call)), len(can_call)) sys.exit(0) BLACKLIST = set(list(BLACKLIST) + extra_blacklist) @@ -848,7 +821,7 @@ def process(code): external_emterpreted_funcs = filter(lambda func: func in tabled_funcs or func in exported_funcs or func in reachable_funcs, emterpreted_funcs) # process functions, generating bytecode - shared.Building.js_optimizer(infile, ['emterpretify'], extra_info={ 'emterpretedFuncs': list(emterpreted_funcs), 'externalEmterpretedFuncs': list(external_emterpreted_funcs), 'opcodes': OPCODES, 'ropcodes': ROPCODES, 'ASYNC': ASYNC, 'PROFILING': PROFILING, 'ASSERTIONS': ASSERTIONS, 'yieldFuncs': YIELDLIST }, output_filename=temp, just_concat=True) + shared.Building.js_optimizer(infile, ['emterpretify'], extra_info={ 'emterpretedFuncs': list(emterpreted_funcs), 'externalEmterpretedFuncs': list(external_emterpreted_funcs), 'opcodes': OPCODES, 'ropcodes': ROPCODES, 'ASYNC': ASYNC, 'PROFILING': PROFILING, 'ASSERTIONS': ASSERTIONS }, output_filename=temp, just_concat=True) # load the module and modify it asm = asm_module.AsmModule(temp) diff --git a/tools/file_packager.py b/tools/file_packager.py index 555bddcb2dc2d..168f019edf325 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -499,8 +499,9 @@ def was_seen(name): # Get the big archive and split it up if no_heap_copy: use_data = ''' - // copy the entire loaded file into a spot in the heap. Files will refer to slices in that. They cannot be freed though. - var ptr = Module['_malloc'](byteArray.length); + // copy the entire loaded file into a spot in the heap. Files will refer to slices in that. They cannot be freed though + // (we may be allocating before malloc is ready, during startup). + var ptr = Module['getMemory'](byteArray.length); Module['HEAPU8'].set(byteArray, ptr); DataRequest.prototype.byteArray = Module['HEAPU8'].subarray(ptr, ptr+byteArray.length); ''' diff --git a/tools/js-optimizer.js b/tools/js-optimizer.js index f3b83637f4c03..3afa5027fe85d 100644 --- a/tools/js-optimizer.js +++ b/tools/js-optimizer.js @@ -5754,7 +5754,6 @@ function emterpretify(ast) { var ASYNC = extraInfo.ASYNC; var PROFILING = extraInfo.PROFILING; var ASSERTIONS = extraInfo.ASSERTIONS; - var yieldFuncs = set(extraInfo.yieldFuncs); var RELATIVE_BRANCHES = set('BR', 'BRT', 'BRF'); var ABSOLUTE_BRANCHES = set('BRA', 'BRTA', 'BRFA'); @@ -5782,6 +5781,14 @@ function emterpretify(ast) { return Array.prototype.slice.call(tempUint8, 0, 8); } + var OK_TO_CALL_WHILE_ASYNC = set('stackSave', 'stackRestore', 'stackAlloc', 'setThrew', '_memset'); // functions which are ok to run while async, even if not emterpreted + function okToCallWhileAsync(name) { + // dynCall *can* be on the stack, they are just bridges; what matters is where they go + if (/^dynCall_/.test(name)) return true; + if (name in OK_TO_CALL_WHILE_ASYNC) return true; + return false; + } + function verifyCode(code, stat) { if (code.length % 4 !== 0) assert(0, JSON.stringify(code)); var len = code.length; @@ -7037,12 +7044,9 @@ function emterpretify(ast) { if (ignore) { // we are not emterpreting this function - if (ASYNC && ASSERTIONS && !/^dynCall_/.test(func[1]) && !(func[1] in yieldFuncs)) { + if (ASYNC && ASSERTIONS && !okToCallWhileAsync(func[1])) { // we need to be careful to never enter non-emterpreted code while doing an async save/restore, // which is what happens if non-emterpreted code is on the stack while we attempt to save. - // note that we special-case dynCall, which *can* be on the stack, they are just bridges; what - // matters is where they go - // add asserts right after each call var stack = []; traverse(func, function(node, type) { @@ -7251,10 +7255,6 @@ function emterpretify(ast) { }); if (ASYNC) { argStats.push(['if', srcToExp('(asyncState|0) == 1'), srcToStat('asyncState = 3;')]); // we know we are during a sleep, mark the state - if (ASSERTIONS && !(func[1] in yieldFuncs)) { - argStats.push(['if', srcToExp('((asyncState|0) == 1) | ((asyncState|0) == 3)'), srcToStat('abort(-12) | 0')]); // if *not* a yield func, we should never get here (trampoline entry) - // while sleeping (3, or 1 which has not yet been turned into a 3) - } argStats = [['if', srcToExp('(asyncState|0) != 2'), ['block', argStats]]]; // 2 means restore, so do not trample the stack } func[3] = func[3].concat(argStats); diff --git a/tools/shared.py b/tools/shared.py index 0ed58048437d4..0c1bcc4cd8ac0 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -1810,6 +1810,30 @@ def optimize_initializer(src): if len(contents) <= JS.INITIALIZER_CHUNK_SIZE: return None return JS.replace_initializers(src, JS.split_initializer(contents)) + @staticmethod + def generate_string_initializer(s): + if Settings.ASSERTIONS: + # append checksum of length and content + crcTable = [] + for i in range(256): + crc = i + for bit in range(8): + crc = (crc >> 1) ^ ((crc & 1) * 0xedb88320) + crcTable.append(crc) + crc = 0xffffffff + n = len(s) + crc = crcTable[(crc ^ n) & 0xff] ^ (crc >> 8) + crc = crcTable[(crc ^ (n >> 8)) & 0xff] ^ (crc >> 8) + for i in s: + crc = crcTable[(crc ^ i) & 0xff] ^ (crc >> 8) + for i in range(4): + s.append((crc >> (8 * i)) & 0xff) + s = ''.join(map(chr, s)) + s = s.replace('\\', '\\\\').replace("'", "\\'") + s = s.replace('\n', '\\n').replace('\r', '\\r') + def escape(x): return '\\x{:02x}'.format(ord(x.group())) + return re.sub('[\x80-\xff]', escape, s) + # Compression of code and data for smaller downloads class Compression: on = False