From edd7ac14ffcb86b5edb88266db08c356f863807c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 10 Jun 2015 00:58:58 +0300 Subject: [PATCH 01/32] Add full test of SSE1 API, currently disabled due to same reasons as test_sse1. --- tests/test_core.py | 14 ++- tests/test_sse1_full.c | 255 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 268 insertions(+), 1 deletion(-) create mode 100644 tests/test_sse1_full.c diff --git a/tests/test_core.py b/tests/test_core.py index be17f34be11d5..6baddd3f9ce03 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -5491,7 +5491,6 @@ def test(): self.emcc_args += ['-s', 'EMTERPRETIFY_WHITELIST=["_frexpl"]'] # test double call assertions test() - # Tests the full SSE1 API. def test_sse1(self): return self.skip('TODO: This test fails due to bugs #2840, #3044, #3045, #3046 and #3048 (also see #3043 and #3049)') Settings.PRECISE_F32 = 1 # SIMD currently requires Math.fround @@ -5501,6 +5500,19 @@ def test_sse1(self): self.emcc_args = orig_args + mode self.do_run(open(path_from_root('tests', 'test_sse1.cpp'), 'r').read(), 'Success!') + # Tests the full SSE1 API. + def test_sse1_full(self): + return self.skip('TODO: This test fails due to bugs #2840, #3044, #3045, #3046 and #3048 (also see #3043 and #3049)') + if SPIDERMONKEY_ENGINE not in JS_ENGINES: return self.skip('test_sse1_full requires SpiderMonkey to run.') + Popen([CLANG, path_from_root('tests', 'test_sse1_full.c'), '-o', 'test_sse1_full'] + get_clang_native_args(), stdout=PIPE, stderr=PIPE).communicate() + native_result, err = Popen('./test_sse1_full', stdout=PIPE, stderr=PIPE).communicate() + + Settings.PRECISE_F32 = 1 # SIMD currently requires Math.fround + orig_args = self.emcc_args + for mode in [[], ['-s', 'SIMD=1']]: + self.emcc_args = orig_args + mode + self.do_run(open(path_from_root('tests', 'test_sse1_full.c'), 'r').read(), native_result) + def test_simd(self): if self.is_emterpreter(): return self.skip('todo') diff --git a/tests/test_sse1_full.c b/tests/test_sse1_full.c new file mode 100644 index 0000000000000..71e402dda46de --- /dev/null +++ b/tests/test_sse1_full.c @@ -0,0 +1,255 @@ +// This file uses SSE1 by calling different functions with different interesting inputs and prints the results. +// Use a diff tool to compare the results between platforms. + +#include +#include +#include +#include +#include +#include +#include + +// Recasts floating point representation of f to an integer. +uint32_t fcastu(float f) { return *(uint32_t*)&f; } +float ucastf(uint32_t t) { return *(float*)&t; } + +// Data used in test. Store them global and access via a getter to confuse optimizer to not "solve" the whole test suite at compile-time, +// so that the operation will actually be performed at runtime, and not at compile-time. (Testing the capacity of the compiler to perform +// SIMD ops at compile-time would be interesting as well, but that's for another test) +float interesting_floats_[] = { -INFINITY, -FLT_MAX, -2.5f, -1.5f, -1.4f, -1.0f, -0.5f, -0.2f, -FLT_MIN, -0.f, 0.f, + 1.401298464e-45f, FLT_MIN, 0.3f, 0.5f, 0.8f, 1.0f, 1.5f, 2.5f, 3.5f, 3.6f, FLT_MAX, INFINITY, NAN, + ucastf(0x01020304), ucastf(0x80000000), ucastf(0x7FFFFFFF), ucastf(0xFFFFFFFF) }; + +bool always_true() { return time(NULL) != 0; } // This function always returns true, but the compiler should not know this. + +bool IsNan(float f) { return (fcastu(f) << 1) > 0xFF000000u; } + +char *SerializeFloat(float f, char *dstStr) +{ + if (!IsNan(f)) + { + int numChars = sprintf(dstStr, "%.9g", f); + return dstStr + numChars; + } + else + { + uint32_t u = fcastu(f); + int numChars = sprintf(dstStr, "NaN(%8X)", (unsigned int)u); + return dstStr + numChars; + } +} + +void tostr(__m128 *m, char *outstr) +{ + union { __m128 m; float val[4]; } u; + u.m = *m; + char s[4][32]; + SerializeFloat(u.val[0], s[0]); + SerializeFloat(u.val[1], s[1]); + SerializeFloat(u.val[2], s[2]); + SerializeFloat(u.val[3], s[3]); + sprintf(outstr, "[%s,%s,%s,%s]", s[3], s[2], s[1], s[0]); +} + +// Accessors to the test data in a way that the compiler can't optimize at compile-time. +__attribute__((noinline)) float *get_arr() +{ + return always_true() ? interesting_floats_ : 0; +} + +__m128 ExtractInRandomOrder(float *arr, int i, int n, int prime) +{ + return _mm_set_ps(arr[(i*prime)%n], arr[((i+1)*prime)%n], arr[((i+2)*prime)%n], arr[((i+3)*prime)%n]); +} + +#define E1(arr, i, n) ExtractInRandomOrder(arr, i, n, 1) +#define E2(arr, i, n) ExtractInRandomOrder(arr, i, n, 1787) + +#define M128_M128(func) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j < numInterestingFloats / 4; ++j) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \ + __m128 ret = func(m1, m2); \ + char str[256], str2[256], str3[256]; \ + tostr(&m1, str); tostr(&m2, str2); tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + +#define M128_M128_int(func) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j < numInterestingFloats / 4; ++j) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \ + int ret = func(m1, m2); \ + char str[256], str2[256]; \ + tostr(&m1, str); tostr(&m2, str2); \ + printf("%s(%s, %s) = %d\n", #func, str, str2, ret); \ + } + +#define M128(func) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + __m128 ret = func(m1); \ + char str[256], str2[256]; \ + tostr(&m1, str); tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define M128_M128_shuffle() \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j < numInterestingFloats / 4; ++j) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \ + __m128 ret = _mm_shuffle_ps(m1, m2, _MM_SHUFFLE(1, 3, 0, 2)); \ + char str[256], str2[256], str3[256]; \ + tostr(&m1, str); tostr(&m2, str2); tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", "_mm_shuffle_ps", str, str2, str3); \ + } + +int main() +{ + float *interesting_floats = get_arr(); + int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]); + assert(numInterestingFloats % 4 == 0); + + // SSE1 Arithmetic instructions: + M128_M128(_mm_add_ps); + M128_M128(_mm_add_ss); + M128_M128(_mm_div_ps); + M128_M128(_mm_div_ss); + M128_M128(_mm_mul_ps); + M128_M128(_mm_mul_ss); + M128_M128(_mm_sub_ps); + M128_M128(_mm_sub_ss); + + // SSE1 Elementary Math functions: + M128(_mm_rcp_ps); + M128(_mm_rcp_ss); + M128(_mm_rsqrt_ps); + M128(_mm_rsqrt_ss); + M128(_mm_sqrt_ps); + M128(_mm_sqrt_ss); + + // SSE1 Logical instructions: + M128_M128(_mm_and_ps); + M128_M128(_mm_andnot_ps); + M128_M128(_mm_or_ps); + M128_M128(_mm_xor_ps); + + // SSE1 Compare instructions: + M128_M128(_mm_cmpeq_ps); + M128_M128(_mm_cmpeq_ss); + M128_M128(_mm_cmpge_ps); + M128_M128(_mm_cmpge_ss); + M128_M128(_mm_cmpgt_ps); + M128_M128(_mm_cmpgt_ss); + M128_M128(_mm_cmple_ps); + M128_M128(_mm_cmple_ss); + M128_M128(_mm_cmplt_ps); + M128_M128(_mm_cmplt_ss); + M128_M128(_mm_cmpneq_ps); + M128_M128(_mm_cmpneq_ss); + M128_M128(_mm_cmpnge_ps); + M128_M128(_mm_cmpnge_ss); + M128_M128(_mm_cmpngt_ps); + M128_M128(_mm_cmpngt_ss); + M128_M128(_mm_cmpnle_ps); + M128_M128(_mm_cmpnle_ss); + M128_M128(_mm_cmpnlt_ps); + M128_M128(_mm_cmpnlt_ss); + M128_M128(_mm_cmpord_ps); + M128_M128(_mm_cmpord_ss); + M128_M128(_mm_cmpunord_ps); + M128_M128(_mm_cmpunord_ss); + + M128_M128_int(_mm_comieq_ss); + M128_M128_int(_mm_comige_ss); + M128_M128_int(_mm_comigt_ss); + M128_M128_int(_mm_comile_ss); + M128_M128_int(_mm_comilt_ss); + M128_M128_int(_mm_comineq_ss); + M128_M128_int(_mm_ucomieq_ss); + M128_M128_int(_mm_ucomige_ss); + M128_M128_int(_mm_ucomigt_ss); + M128_M128_int(_mm_ucomile_ss); + M128_M128_int(_mm_ucomilt_ss); + M128_M128_int(_mm_ucomineq_ss); + +/* + // SSE1 Convert instructions: + _mm_cvt_si2ss; + _mm_cvt_ss2si; + _mm_cvtsi32_ss; + _mm_cvtss_f32; + _mm_cvtss_si32; + _mm_cvtss_si64; + _mm_cvtt_ss2si; + _mm_cvttss_si32; + _mm_cvttss_si64; +*/ + +/* + // SSE1 Load functions: + _mm_load_ps + _mm_load_ps1 + _mm_load_ss + _mm_load1_ps + _mm_loadh_pi + _mm_loadl_pi + _mm_loadr_ps + _mm_loadu_ps +*/ + + // SSE1 Miscellaneous functions: +// _mm_movemask_ps + + // SSE1 Move functions: + M128_M128(_mm_move_ss); + M128_M128(_mm_movehl_ps); + M128_M128(_mm_movelh_ps); + +/* + // SSE1 Set functions: + _mm_set_ps + _mm_set_ps1 + _mm_set_ss + _mm_set1_ps + _mm_setr_ps + _mm_setzero_ps +*/ + + // SSE1 Special Math instructions: + M128_M128(_mm_max_ps); + M128_M128(_mm_max_ss); + M128_M128(_mm_min_ps); + M128_M128(_mm_min_ss); + +/* + // SSE1 Store instructions: + _mm_store_ps + _mm_store_ps1 + _mm_store_ss + _mm_store1_ps + _mm_storeh_pi + _mm_storel_pi + _mm_storer_ps + _mm_storeu_ps + _mm_stream_pi + _mm_stream_ps +*/ + + // SSE1 Swizzle instructions: + M128_M128_shuffle(); + // _MM_TRANSPOSE4_PS + M128_M128(_mm_unpackhi_ps); + M128_M128(_mm_unpacklo_ps); +} From 9de07c2bcc43ce6f80cdbfa137b22590a40aba65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 10 Jun 2015 11:38:17 +0300 Subject: [PATCH 02/32] Remove unnecessary relpath in emar. Fixes #3525. --- emar | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emar b/emar index adb2858202648..ac013038b44a4 100755 --- a/emar +++ b/emar @@ -40,7 +40,7 @@ if len(newargs) > 2: parts = base_name.split('.') parts[0] += '_' + h newname = '.'.join(parts) - full_newname = os.path.relpath(os.path.join(dir_name, newname)) + full_newname = os.path.join(dir_name, newname) if not os.path.exists(full_newname): try: # it is ok to fail here, we just don't get hashing shutil.copyfile(orig_name, full_newname) From 200e17e940c9f7da459e76b3834a2629815bccd7 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 10 Jun 2015 11:54:48 -0700 Subject: [PATCH 03/32] add fcmp constantexpr testcase --- tests/cases/fcmp_constexpr.ll | 17 +++++++++++++++++ tests/cases/fcmp_constexpr.txt | 1 + 2 files changed, 18 insertions(+) create mode 100644 tests/cases/fcmp_constexpr.ll create mode 100644 tests/cases/fcmp_constexpr.txt diff --git a/tests/cases/fcmp_constexpr.ll b/tests/cases/fcmp_constexpr.ll new file mode 100644 index 0000000000000..4bfa013ce8ceb --- /dev/null +++ b/tests/cases/fcmp_constexpr.ll @@ -0,0 +1,17 @@ +; ModuleID = 'tests/hello_world.bc' +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +@.str = private unnamed_addr constant [20 x i8] c"hello, world %.2f!\0A\00", align 1 + +declare i32 @printf(i8*, ...) + +define i32 @main() { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + %waka = select i1 fcmp ult (float fadd (float fmul (float undef, float 1.0), float 2.0), float 3.0), double 4.0, double 5.0 + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str, i32 0, i32 0), double %waka) + ret i32 0 +} + diff --git a/tests/cases/fcmp_constexpr.txt b/tests/cases/fcmp_constexpr.txt new file mode 100644 index 0000000000000..ca96a9dcd43eb --- /dev/null +++ b/tests/cases/fcmp_constexpr.txt @@ -0,0 +1 @@ +hello, world 4.00! From 5d970d5a49098e203a78995fe72f1f3a1b34f502 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 10 Jun 2015 14:05:41 -0700 Subject: [PATCH 04/32] add missing GL dependency for GLFW #3530 --- src/library_glfw.js | 2 +- tests/glfw_minimal.c | 27 +++++++++++++++++++++++++++ tests/test_browser.py | 4 ++++ 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 tests/glfw_minimal.c diff --git a/src/library_glfw.js b/src/library_glfw.js index 52519b85f540f..631aef9d3cfb9 100644 --- a/src/library_glfw.js +++ b/src/library_glfw.js @@ -32,7 +32,7 @@ ******************************************************************************/ var LibraryGLFW = { - $GLFW__deps: ['emscripten_get_now'], + $GLFW__deps: ['emscripten_get_now', '$GL'], $GLFW: { Window: function(id, width, height, title, monitor, share) { diff --git a/tests/glfw_minimal.c b/tests/glfw_minimal.c new file mode 100644 index 0000000000000..dee45643ef7b2 --- /dev/null +++ b/tests/glfw_minimal.c @@ -0,0 +1,27 @@ +#include +#include +#include +#define GLFW_INCLUDE_ES2 +#include + +int main() { + printf("main function started\n"); + if (glfwInit() != GL_TRUE) { + printf("glfwInit() failed\n"); + glfwTerminate(); + } else { + printf("glfwInit() success\n"); + if (glfwOpenWindow(640, 480, 8, 8, 8, 8, 16, 0, GLFW_WINDOW) != GL_TRUE){ + printf("glfwOpenWindow() failed\n"); + glfwTerminate(); + } else { + printf("glfwOpenWindow() success\n"); + } + } +#ifdef REPORT_RESULT + int result = 1; + REPORT_RESULT(); +#endif + return EXIT_SUCCESS; +} + diff --git a/tests/test_browser.py b/tests/test_browser.py index d38771e55d85c..9a2e61e0b7ae5 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1110,6 +1110,10 @@ def test_glfw(self): self.btest('glfw.c', '1', args=['-s', 'LEGACY_GL_EMULATION=1']) self.btest('glfw.c', '1', args=['-s', 'LEGACY_GL_EMULATION=1', '-s', 'USE_GLFW=2']) + def test_glfw_minimal(self): + self.btest('glfw_minimal.c', '1', args=[]) + self.btest('glfw_minimal.c', '1', args=['-s', 'USE_GLFW=2']) + def test_egl(self): open(os.path.join(self.get_dir(), 'test_egl.c'), 'w').write(self.with_report_result(open(path_from_root('tests', 'test_egl.c')).read())) From fe87592d33043be4018b98d3a286bf4f81df82a2 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 10 Jun 2015 15:50:06 -0700 Subject: [PATCH 05/32] remove emterpreter YIELDLIST - it complicates the mental model and is not worth it for the rather slim amount of benefit it provides --- emcc | 2 +- src/library_async.js | 2 +- src/settings.js | 11 ----------- tests/test_browser.py | 4 ++-- tests/test_other.py | 7 ------- tools/emterpretify.py | 29 +---------------------------- tools/js-optimizer.js | 18 +++++++++--------- 7 files changed, 14 insertions(+), 59 deletions(-) diff --git a/emcc b/emcc index cfec4a0effe12..513ff9fb0b147 100755 --- a/emcc +++ b/emcc @@ -1562,7 +1562,7 @@ try: try: # move temp js to final position, alongside its mem init file shutil.move(final, js_target) - args = [shared.PYTHON, shared.path_from_root('tools', 'emterpretify.py'), js_target, final + '.em.js', json.dumps(shared.Settings.EMTERPRETIFY_BLACKLIST), json.dumps(shared.Settings.EMTERPRETIFY_WHITELIST), json.dumps(shared.Settings.EMTERPRETIFY_YIELDLIST), str(shared.Settings.SWAPPABLE_ASM_MODULE)] + args = [shared.PYTHON, shared.path_from_root('tools', 'emterpretify.py'), js_target, final + '.em.js', json.dumps(shared.Settings.EMTERPRETIFY_BLACKLIST), json.dumps(shared.Settings.EMTERPRETIFY_WHITELIST), '', str(shared.Settings.SWAPPABLE_ASM_MODULE)] if shared.Settings.EMTERPRETIFY_ASYNC: args += ['ASYNC=1'] if shared.Settings.EMTERPRETIFY_ADVISE: diff --git a/src/library_async.js b/src/library_async.js index 6742cbee8ec2c..f9104f367b5d7 100644 --- a/src/library_async.js +++ b/src/library_async.js @@ -222,7 +222,7 @@ mergeInto(LibraryManager.library, { #if ASSERTIONS abortDecorators.push(function(output, what) { if (EmterpreterAsync.state !== 0) { - return output + '\nThis error happened during an emterpreter-async save or load of the stack. Was there non-emterpreted code on the stack during save (which is unallowed)? You may want to adjust EMTERPRETIFY_BLACKLIST, EMTERPRETIFY_WHITELIST, or EMTERPRETIFY_YIELDLIST (to consider certain functions ok to run during an emscripten_sleep_with_yield).\nThis is what the stack looked like when we tried to save it: ' + [EmterpreterAsync.state, EmterpreterAsync.saveStack]; + return output + '\nThis error happened during an emterpreter-async save or load of the stack. Was there non-emterpreted code on the stack during save (which is unallowed)? You may want to adjust EMTERPRETIFY_BLACKLIST, EMTERPRETIFY_WHITELIST.\nThis is what the stack looked like when we tried to save it: ' + [EmterpreterAsync.state, EmterpreterAsync.saveStack]; } return output; }); diff --git a/src/settings.js b/src/settings.js index b4868d7fd2857..82027660819e1 100644 --- a/src/settings.js +++ b/src/settings.js @@ -464,22 +464,11 @@ var EMTERPRETIFY = 0; // Runs tools/emterpretify on the compiler output var EMTERPRETIFY_BLACKLIST = []; // Functions to not emterpret, that is, to run normally at full speed var EMTERPRETIFY_WHITELIST = []; // If this contains any functions, then only the functions in this list // are emterpreted (as if all the rest are blacklisted; this overrides the BLACKLIST) -var EMTERPRETIFY_YIELDLIST = []; // A list of functions that are allowed to run during while sleeping. Typically this is - // during emscripten_sleep_with_yield , but also you may need to add methods to this list - // for things like event handling (an SDL EventHandler will be called from the event, directly - - // if we do that later, you lose out on the whole point of an EventHandler, which is to let - // you react to key presses in order to launch fullscreen, etc.). - // Functions in the yield list do not trigger asserts checking on running during a sleep, - // in ASSERTIONS builds, var EMTERPRETIFY_ASYNC = 0; // Allows sync code in the emterpreter, by saving the call stack, doing an async delay, and resuming it var EMTERPRETIFY_ADVISE = 0; // Performs a static analysis to suggest which functions should be run in the emterpreter, as it // appears they can be on the stack when a sync function is called in the EMTERPRETIFY_ASYNC option. // After showing the suggested list, compilation will halt. You can apply the provided list as an // emcc argument when compiling later. - // This will also advise on the YIELDLIST, if it contains at least one value (it then reports - // all things reachable from that function, as they may need to be in the YIELDLIST as well). - // Note that this depends on things like inlining. If you run this with different inlining than - // when you use the list, it might not work. var RUNNING_JS_OPTS = 0; // whether js opts will be run, after the main compiler var BOOTSTRAPPING_STRUCT_INFO = 0; // whether we are in the generate struct_info bootstrap phase diff --git a/tests/test_browser.py b/tests/test_browser.py index 9a2e61e0b7ae5..154c207964026 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -621,7 +621,7 @@ def test_sdl_key(self): ]: for emterps in [ [], - ['-DTEST_SLEEP', '-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-s', 'ASSERTIONS=1', '-s', 'EMTERPRETIFY_YIELDLIST=["_EventHandler"]', '-s', "SAFE_HEAP=1"] + ['-DTEST_SLEEP', '-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-s', 'ASSERTIONS=1', '-s', "SAFE_HEAP=1"] ]: print delay, defines, emterps open(os.path.join(self.get_dir(), 'pre.js'), 'w').write(''' @@ -2400,7 +2400,7 @@ def test_emterpreter_async_sleep2(self): self.btest('emterpreter_async_sleep2.cpp', '1', args=['-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-Oz']) def test_sdl_audio_beep_sleep(self): - self.btest('sdl_audio_beep_sleep.cpp', '1', args=['-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-Os', '-s', 'ASSERTIONS=1', '-s', 'DISABLE_EXCEPTION_CATCHING=0', '-profiling', '-s', 'EMTERPRETIFY_YIELDLIST=["__Z14audio_callbackPvPhi", "__ZN6Beeper15generateSamplesIhEEvPT_i", "__ZN6Beeper15generateSamplesIsEEvPT_i"]', '-s', 'SAFE_HEAP=1']) + self.btest('sdl_audio_beep_sleep.cpp', '1', args=['-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-Os', '-s', 'ASSERTIONS=1', '-s', 'DISABLE_EXCEPTION_CATCHING=0', '-profiling', '-s', 'SAFE_HEAP=1']) def test_mainloop_reschedule(self): self.btest('mainloop_reschedule.cpp', '1', args=['-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-Os']) diff --git a/tests/test_other.py b/tests/test_other.py index 7fcf26ecabb51..aa0845171da7a 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -4418,13 +4418,6 @@ def test_emterpreter_advise(self): out, err = Popen([PYTHON, EMCC, path_from_root('tests', 'emterpreter_advise_funcptr.cpp'), '-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-s', 'EMTERPRETIFY_ADVISE=1'], stdout=PIPE).communicate() self.assertContained('-s EMTERPRETIFY_WHITELIST=\'["__Z4posti", "__Z5post2i", "__Z6middlev", "__Z7sleeperv", "__Z8recurserv", "_main"]\'', out) - self.assertNotContained('EMTERPRETIFY_YIELDLIST', out); - - out, err = Popen([PYTHON, EMCC, path_from_root('tests', 'emterpreter_advise_funcptr.cpp'), '-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-s', 'EMTERPRETIFY_ADVISE=1', '-s', 'EMTERPRETIFY_YIELDLIST=["__Z6middlev"]'], stdout=PIPE).communicate() - self.assertContained('-s EMTERPRETIFY_YIELDLIST=\'["__Z6middlev", "__Z7siblingii", "__Z7sleeperv", "__Z8recurserv", "_printf"]\'', out) - - out, err = Popen([PYTHON, EMCC, path_from_root('tests', 'emterpreter_advise_funcptr.cpp'), '-s', 'EMTERPRETIFY=1', '-s', 'EMTERPRETIFY_ASYNC=1', '-s', 'EMTERPRETIFY_ADVISE=1', '-s', 'EMTERPRETIFY_YIELDLIST=["__Z3pref"]'], stdout=PIPE).communicate() - self.assertContained('-s EMTERPRETIFY_YIELDLIST=\'["__Z3pref", "__Z7siblingii", "_printf"]\'', out) def test_link_with_a_static(self): for args in [[], ['-O2']]: diff --git a/tools/emterpretify.py b/tools/emterpretify.py index e713c2e1a805e..cf8667f89daf8 100755 --- a/tools/emterpretify.py +++ b/tools/emterpretify.py @@ -57,7 +57,6 @@ def handle_arg(arg): BLACKLIST = set(['_malloc', '_free', '_memcpy', '_memmove', '_memset', 'copyTempDouble', 'copyTempFloat', '_strlen', 'stackAlloc', 'setThrew', 'stackRestore', 'setTempRet0', 'getTempRet0', 'stackSave', 'runPostSets', '_emscripten_autodebug_double', '_emscripten_autodebug_float', '_emscripten_autodebug_i8', '_emscripten_autodebug_i16', '_emscripten_autodebug_i32', '_emscripten_autodebug_i64', '_strncpy', '_strcpy', '_strcat', '_saveSetjmp', '_testSetjmp', '_emscripten_replace_memory', '_bitshift64Shl', '_bitshift64Ashr', '_bitshift64Lshr', 'setAsyncState', 'emtStackSave']) WHITELIST = [] -YIELDLIST = ['stackSave', 'stackRestore', 'stackAlloc', 'setThrew', '_memset'] # functions which are ok to run while doing a sleep_with_yield. SYNC_FUNCS = set(['_emscripten_sleep', '_emscripten_sleep_with_yield', '_emscripten_wget_data', '_emscripten_idb_load', '_emscripten_idb_store', '_emscripten_idb_delete']) @@ -703,8 +702,6 @@ def process(code): infile = sys.argv[1] outfile = sys.argv[2] - original_yieldlist = YIELDLIST - extra_blacklist = [] if len(sys.argv) >= 4: temp = sys.argv[3] @@ -723,13 +720,6 @@ def process(code): WHITELIST = json.loads(temp) if len(sys.argv) >= 6: - temp = sys.argv[5] - if temp[0] == '"': - # response file - assert temp[1] == '@' - temp = open(temp[2:-1]).read() - YIELDLIST = YIELDLIST + json.loads(temp) - if len(sys.argv) >= 7: SWAPPABLE = int(sys.argv[6]) @@ -774,23 +764,6 @@ def process(code): print "Suggested list of functions to run in the emterpreter:" print " -s EMTERPRETIFY_WHITELIST='" + str(sorted(list(advised))).replace("'", '"') + "'" print "(%d%% out of %d functions)" % (int((100.0*len(advised))/len(can_call)), len(can_call)) - if len(YIELDLIST) > len(original_yieldlist): - # advise on the yield list as well. Anything a yield function can reach, likely needs to also be a yield function - YIELD_IGNORE = set(['abort']) - to_check = list(YIELDLIST) - advised = set([str(f) for f in YIELDLIST]) - while len(to_check) > 0: - curr = to_check.pop() - if curr not in can_call: continue - for next in can_call[curr]: - if next not in advised: - advised.add(str(next)) - to_check.append(next) - advised = [next for next in advised if not is_dyn_call(next) and not is_function_table(next) and not next in original_yieldlist and next not in SYNC_FUNCS and next not in YIELD_IGNORE and next[0] == '_'] - print - print "Suggested list of yield functions for the emterpreter:" - print " -s EMTERPRETIFY_YIELDLIST='" + str(sorted(list(advised))).replace("'", '"') + "'" - print "(%d%% out of %d functions)" % (int((100.0*len(advised))/len(can_call)), len(can_call)) sys.exit(0) BLACKLIST = set(list(BLACKLIST) + extra_blacklist) @@ -848,7 +821,7 @@ def process(code): external_emterpreted_funcs = filter(lambda func: func in tabled_funcs or func in exported_funcs or func in reachable_funcs, emterpreted_funcs) # process functions, generating bytecode - shared.Building.js_optimizer(infile, ['emterpretify'], extra_info={ 'emterpretedFuncs': list(emterpreted_funcs), 'externalEmterpretedFuncs': list(external_emterpreted_funcs), 'opcodes': OPCODES, 'ropcodes': ROPCODES, 'ASYNC': ASYNC, 'PROFILING': PROFILING, 'ASSERTIONS': ASSERTIONS, 'yieldFuncs': YIELDLIST }, output_filename=temp, just_concat=True) + shared.Building.js_optimizer(infile, ['emterpretify'], extra_info={ 'emterpretedFuncs': list(emterpreted_funcs), 'externalEmterpretedFuncs': list(external_emterpreted_funcs), 'opcodes': OPCODES, 'ropcodes': ROPCODES, 'ASYNC': ASYNC, 'PROFILING': PROFILING, 'ASSERTIONS': ASSERTIONS }, output_filename=temp, just_concat=True) # load the module and modify it asm = asm_module.AsmModule(temp) diff --git a/tools/js-optimizer.js b/tools/js-optimizer.js index f3b83637f4c03..3afa5027fe85d 100644 --- a/tools/js-optimizer.js +++ b/tools/js-optimizer.js @@ -5754,7 +5754,6 @@ function emterpretify(ast) { var ASYNC = extraInfo.ASYNC; var PROFILING = extraInfo.PROFILING; var ASSERTIONS = extraInfo.ASSERTIONS; - var yieldFuncs = set(extraInfo.yieldFuncs); var RELATIVE_BRANCHES = set('BR', 'BRT', 'BRF'); var ABSOLUTE_BRANCHES = set('BRA', 'BRTA', 'BRFA'); @@ -5782,6 +5781,14 @@ function emterpretify(ast) { return Array.prototype.slice.call(tempUint8, 0, 8); } + var OK_TO_CALL_WHILE_ASYNC = set('stackSave', 'stackRestore', 'stackAlloc', 'setThrew', '_memset'); // functions which are ok to run while async, even if not emterpreted + function okToCallWhileAsync(name) { + // dynCall *can* be on the stack, they are just bridges; what matters is where they go + if (/^dynCall_/.test(name)) return true; + if (name in OK_TO_CALL_WHILE_ASYNC) return true; + return false; + } + function verifyCode(code, stat) { if (code.length % 4 !== 0) assert(0, JSON.stringify(code)); var len = code.length; @@ -7037,12 +7044,9 @@ function emterpretify(ast) { if (ignore) { // we are not emterpreting this function - if (ASYNC && ASSERTIONS && !/^dynCall_/.test(func[1]) && !(func[1] in yieldFuncs)) { + if (ASYNC && ASSERTIONS && !okToCallWhileAsync(func[1])) { // we need to be careful to never enter non-emterpreted code while doing an async save/restore, // which is what happens if non-emterpreted code is on the stack while we attempt to save. - // note that we special-case dynCall, which *can* be on the stack, they are just bridges; what - // matters is where they go - // add asserts right after each call var stack = []; traverse(func, function(node, type) { @@ -7251,10 +7255,6 @@ function emterpretify(ast) { }); if (ASYNC) { argStats.push(['if', srcToExp('(asyncState|0) == 1'), srcToStat('asyncState = 3;')]); // we know we are during a sleep, mark the state - if (ASSERTIONS && !(func[1] in yieldFuncs)) { - argStats.push(['if', srcToExp('((asyncState|0) == 1) | ((asyncState|0) == 3)'), srcToStat('abort(-12) | 0')]); // if *not* a yield func, we should never get here (trampoline entry) - // while sleeping (3, or 1 which has not yet been turned into a 3) - } argStats = [['if', srcToExp('(asyncState|0) != 2'), ['block', argStats]]]; // 2 means restore, so do not trample the stack } func[3] = func[3].concat(argStats); From e025cce13edfc502f2f2bc538311b68324ab17fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Thu, 11 Jun 2015 17:57:15 +0300 Subject: [PATCH 06/32] Add new test test_biggerswitch that aims to check we are able to compile switch-cases of unbounded length. --- tests/gen_large_switchcase.py | 27 +++++++++++++++++++++++++++ tests/test_core.py | 5 +++++ 2 files changed, 32 insertions(+) create mode 100644 tests/gen_large_switchcase.py diff --git a/tests/gen_large_switchcase.py b/tests/gen_large_switchcase.py new file mode 100644 index 0000000000000..b027a43975cc4 --- /dev/null +++ b/tests/gen_large_switchcase.py @@ -0,0 +1,27 @@ +import random, sys +num_cases = int(sys.argv[1]) +cases = '' +i = 1 +for x in range(0, num_cases): + cases += ' case ' + str(i) + ': return "' + str(i) + str(i) + str(i) + '";\n' + i += random.randint(1, 5) + +print '''#include +#include +#include + +const char *foo(int x) +{ + switch(x) + { +''' + cases + ''' + default: return ""; + } +} + +int main() +{ + for(int i = 0; i < 100; ++i) + printf("%s\\n", foo((int)(emscripten_get_now() * 1000) % ''' + str(i) + ''')); + printf("Success!\\n"); +}''' diff --git a/tests/test_core.py b/tests/test_core.py index 6baddd3f9ce03..ac78af29f1cfe 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -2224,6 +2224,11 @@ def test_bigswitch(self): 35040: GL_STREAM_DRAW (0x88E0) ''', args=['34962', '26214', '35040']) + def test_biggerswitch(self): + num_cases = 2000 # TODO: Increase this to ~20000 range, since seeing autogenerated code that reaches that many cases. + switch_case, err = Popen([PYTHON, path_from_root('tests', 'gen_large_switchcase.py'), str(num_cases)], stdout=PIPE, stderr=PIPE).communicate() + self.do_run(switch_case, 'Success!') + def test_indirectbr(self): Building.COMPILER_TEST_OPTS = filter(lambda x: x != '-g', Building.COMPILER_TEST_OPTS) From 66755c16d248db12848e0b5dd9fd90e86dcf9d12 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 11 Jun 2015 17:22:36 -0700 Subject: [PATCH 07/32] allow memory growth before the runtime is ready --- emscripten.py | 2 +- tests/test_browser.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/emscripten.py b/emscripten.py index 53976cbaba508..bd9d1336534f9 100755 --- a/emscripten.py +++ b/emscripten.py @@ -675,7 +675,7 @@ def math_fix(g): assert(!runtimeExited, 'the runtime was exited (use NO_EXIT_RUNTIME to keep it alive after main() exits)'); return real_''' + s + '''.apply(null, arguments); }; -''' for s in exported_implemented_functions if s not in ['_malloc', '_free', '_memcpy', '_memset', 'runPostSets']]) +''' for s in exported_implemented_functions if s not in ['_malloc', '_free', '_memcpy', '_memset', 'runPostSets', '_emscripten_replace_memory']]) if not settings['SWAPPABLE_ASM_MODULE']: receiving += ';\n'.join(['var ' + s + ' = Module["' + s + '"] = asm["' + s + '"]' for s in exported_implemented_functions + function_tables]) diff --git a/tests/test_browser.py b/tests/test_browser.py index 154c207964026..cd09296479d4a 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -2518,6 +2518,12 @@ def test_dynamic_link_glemu(self): self.btest(self.in_dir('main.cpp'), '1', args=['-s', 'MAIN_MODULE=1', '-O2', '-s', 'LEGACY_GL_EMULATION=1', '--pre-js', 'pre.js']) + def test_memory_growth_during_startup(self): + open('data.dat', 'w').write('X' * (30*1024*1024)) + self.btest('browser_test_hello_world.c', '0', args=['-s', 'ASSERTIONS=1', '-s', 'ALLOW_MEMORY_GROWTH=1', '-s', 'TOTAL_MEMORY=10000', '-s', 'TOTAL_STACK=5000', '--preload-file', 'data.dat']) + + # pthreads tests + # Test that the emscripten_ atomics api functions work. def test_pthread_atomics(self): self.btest(path_from_root('tests', 'pthread', 'test_pthread_atomics.cpp'), expected='0', args=['-O3', '-s', 'USE_PTHREADS=1', '-s', 'PTHREAD_POOL_SIZE=8']) From 73c5b88fda65323d5e460b60603f1f8191f3d15c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 11 Jun 2015 17:38:06 -0700 Subject: [PATCH 08/32] use getMemory in file packager - we may allocate before startup is done --- src/preamble.js | 1 + tools/file_packager.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/preamble.js b/src/preamble.js index 3c348a4ef2d7a..5f4b213eb8c71 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -439,6 +439,7 @@ function getMemory(size) { if (typeof _sbrk !== 'undefined' && !_sbrk.called) return Runtime.dynamicAlloc(size); return _malloc(size); } +Module['getMemory'] = getMemory; function Pointer_stringify(ptr, /* optional */ length) { if (length === 0 || !ptr) return ''; diff --git a/tools/file_packager.py b/tools/file_packager.py index 555bddcb2dc2d..168f019edf325 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -499,8 +499,9 @@ def was_seen(name): # Get the big archive and split it up if no_heap_copy: use_data = ''' - // copy the entire loaded file into a spot in the heap. Files will refer to slices in that. They cannot be freed though. - var ptr = Module['_malloc'](byteArray.length); + // copy the entire loaded file into a spot in the heap. Files will refer to slices in that. They cannot be freed though + // (we may be allocating before malloc is ready, during startup). + var ptr = Module['getMemory'](byteArray.length); Module['HEAPU8'].set(byteArray, ptr); DataRequest.prototype.byteArray = Module['HEAPU8'].subarray(ptr, ptr+byteArray.length); ''' From 320d6377752a2aa04ee484d71e40eadc05d0a90b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 11 Jun 2015 17:41:20 -0700 Subject: [PATCH 09/32] if the runtime is not initialized, do not call malloc in getMemory --- src/preamble.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/preamble.js b/src/preamble.js index 5f4b213eb8c71..25c3d302d62d1 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -436,7 +436,7 @@ Module['allocate'] = allocate; // Allocate memory during any stage of startup - static memory early on, dynamic memory later, malloc when ready function getMemory(size) { if (!staticSealed) return Runtime.staticAlloc(size); - if (typeof _sbrk !== 'undefined' && !_sbrk.called) return Runtime.dynamicAlloc(size); + if ((typeof _sbrk !== 'undefined' && !_sbrk.called) || !runtimeInitialized) return Runtime.dynamicAlloc(size); return _malloc(size); } Module['getMemory'] = getMemory; From 887cb524c41f7c20f40c9564e11238aa8810c3c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Fri, 12 Jun 2015 14:53:11 +0300 Subject: [PATCH 10/32] Add a new test case to emscripten_log.cpp to fix a logging crash, and fix it by removing a unnecessarily strict assertion in formatString (code in formatString only needs 4 byte alignment, but asserts 8 bytes) --- src/library_formatString.js | 2 +- tests/emscripten_log/emscripten_log.cpp | 3 +++ tests/test_core.py | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/library_formatString.js b/src/library_formatString.js index f7b91b407e285..3d4ee14341813 100644 --- a/src/library_formatString.js +++ b/src/library_formatString.js @@ -5,7 +5,7 @@ mergeInto(LibraryManager.library, { // Returns the resulting string string as a character array. _formatString__deps: ['strlen', '_reallyNegative'], _formatString: function(format, varargs) { - assert((varargs & 7) === 0); + assert((varargs & 3) === 0); var textIndex = format; var argIndex = 0; function getNextArg(type) { diff --git a/tests/emscripten_log/emscripten_log.cpp b/tests/emscripten_log/emscripten_log.cpp index 1a2f3c23c8a79..69760b9cf1b16 100644 --- a/tests/emscripten_log/emscripten_log.cpp +++ b/tests/emscripten_log/emscripten_log.cpp @@ -134,6 +134,9 @@ void __attribute__((noinline)) Foo() // Arbitrary function signature to add some int main() { + int test = 123; + emscripten_log(EM_LOG_FUNC_PARAMS | EM_LOG_DEMANGLE | EM_LOG_CONSOLE, "test print %d\n", test); + Foo(); #ifdef REPORT_RESULT REPORT_RESULT(); diff --git a/tests/test_core.py b/tests/test_core.py index 6baddd3f9ce03..f982a30660cd7 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -7013,7 +7013,8 @@ def test_emscripten_log(self): # XXX Does not work in SpiderMonkey since callstacks cannot be captured when running in asm.js, see https://bugzilla.mozilla.org/show_bug.cgi?id=947996 self.banned_js_engines = [SPIDERMONKEY_ENGINE] if '-g' not in Building.COMPILER_TEST_OPTS: Building.COMPILER_TEST_OPTS.append('-g') - self.do_run('#define RUN_FROM_JS_SHELL\n' + open(path_from_root('tests', 'emscripten_log', 'emscripten_log.cpp')).read(), "Success!") + Building.COMPILER_TEST_OPTS += ['-DRUN_FROM_JS_SHELL'] + self.do_run(open(path_from_root('tests', 'emscripten_log', 'emscripten_log.cpp')).read(), "Success!") def test_float_literals(self): self.do_run_from_file(path_from_root('tests', 'test_float_literals.cpp'), path_from_root('tests', 'test_float_literals.out')) From 3ddc9f28726bcb209829aa83792e8ebbcb99b80c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 12 Jun 2015 11:14:37 -0700 Subject: [PATCH 11/32] remove free() hack in browser.test_runtime_misuse --- tests/runtime_misuse.cpp | 7 +------ tests/runtime_misuse_2.cpp | 3 +-- tests/test_browser.py | 8 +++++++- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/runtime_misuse.cpp b/tests/runtime_misuse.cpp index 7a264d600dbe6..447146bec183b 100644 --- a/tests/runtime_misuse.cpp +++ b/tests/runtime_misuse.cpp @@ -6,18 +6,13 @@ extern "C" { int noted = 0; char* EMSCRIPTEN_KEEPALIVE note(int n) { + EM_ASM_({ Module.noted = $0 }, (int)¬ed); EM_ASM_({ Module.print([$0, $1]) }, n, noted); noted += n; EM_ASM_({ Module.print(['noted is now', $0]) }, noted); return (char*)"silly-string"; } -void free(void*) { // free is valid to call even after the runtime closes, so useful as a hack here for this test - EM_ASM_({ Module.print(['reporting', $0]) }, noted); - int result = noted; - REPORT_RESULT(); -} - } int main() { diff --git a/tests/runtime_misuse_2.cpp b/tests/runtime_misuse_2.cpp index 4da63d6651db2..2c901690defcd 100644 --- a/tests/runtime_misuse_2.cpp +++ b/tests/runtime_misuse_2.cpp @@ -6,6 +6,7 @@ extern "C" { int noted = 0; char* EMSCRIPTEN_KEEPALIVE note(int n) { + EM_ASM_({ Module.noted = $0 }, (int)¬ed); EM_ASM_({ Module.print([$0, $1]) }, n, noted); noted += n; EM_ASM_({ Module.print(['noted is now', $0]) }, noted); @@ -14,8 +15,6 @@ char* EMSCRIPTEN_KEEPALIVE note(int n) { void free(void*) { // free is valid to call even after the runtime closes, so useful as a hack here for this test EM_ASM_({ Module.print(['reporting', $0]) }, noted); - int result = noted; - REPORT_RESULT(); } } diff --git a/tests/test_browser.py b/tests/test_browser.py index cd09296479d4a..e4af1707af8fd 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1740,7 +1740,13 @@ def test_runtime_misuse(self): doDirectCall(300); } - setTimeout(Module['_free'], 1000); // free is valid to call even after the runtime closes + setTimeout(function() { + var xhr = new XMLHttpRequest(); + assert(Module.noted); + xhr.open('GET', 'http://localhost:8888/report_result?' + HEAP32[Module.noted>>2]); + xhr.send(); + setTimeout(function() { window.close() }, 1000); + }, 1000); ''' open('pre_main.js', 'w').write(r''' From 55ee0f64b65d76f5876a077c1b9f8ea489e3fa02 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 12 Jun 2015 11:15:06 -0700 Subject: [PATCH 12/32] disallow malloc/free calls when runtime is not available --- emscripten.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/emscripten.py b/emscripten.py index bd9d1336534f9..5a7a245ef73b7 100755 --- a/emscripten.py +++ b/emscripten.py @@ -669,13 +669,13 @@ def math_fix(g): receiving = '' if settings['ASSERTIONS']: # assert on the runtime being in a valid state when calling into compiled code. The only exceptions are - # some support code like malloc TODO: verify that malloc is actually safe to use that way + # some support code receiving = '\n'.join(['var real_' + s + ' = asm["' + s + '"]; asm["' + s + '''"] = function() { assert(runtimeInitialized, 'you need to wait for the runtime to be ready (e.g. wait for main() to be called)'); assert(!runtimeExited, 'the runtime was exited (use NO_EXIT_RUNTIME to keep it alive after main() exits)'); return real_''' + s + '''.apply(null, arguments); }; -''' for s in exported_implemented_functions if s not in ['_malloc', '_free', '_memcpy', '_memset', 'runPostSets', '_emscripten_replace_memory']]) +''' for s in exported_implemented_functions if s not in ['_memcpy', '_memset', 'runPostSets', '_emscripten_replace_memory']]) if not settings['SWAPPABLE_ASM_MODULE']: receiving += ';\n'.join(['var ' + s + ' = Module["' + s + '"] = asm["' + s + '"]' for s in exported_implemented_functions + function_tables]) From 8d0e9a831faf4fd768c263f74e746cfa89706905 Mon Sep 17 00:00:00 2001 From: Philipp Wiesemann Date: Sat, 13 Jun 2015 21:15:35 +0200 Subject: [PATCH 13/32] Fix warnings from sphinx in documentation This also fixed broken links. --- site/source/conf.py | 2 +- site/source/docs/api_reference/emscripten.h.rst | 9 +++++---- site/source/docs/api_reference/html5.h.rst | 3 ++- site/source/docs/api_reference/preamble.js.rst | 2 ++ .../verify_emscripten_environment.rst | 2 +- site/source/docs/getting_started/test-suite.rst | 2 +- site/source/docs/index.rst | 2 +- .../Talks-and-Publications.rst | 12 ++++++++---- .../porting/connecting_cpp_and_javascript/embind.rst | 4 ++-- site/source/docs/porting/files/packaging_files.rst | 1 + site/source/docs/porting/index.rst | 1 + 11 files changed, 25 insertions(+), 15 deletions(-) diff --git a/site/source/conf.py b/site/source/conf.py index cf2e4e313362b..30658d3ea1a08 100644 --- a/site/source/conf.py +++ b/site/source/conf.py @@ -180,7 +180,7 @@ # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -html_favicon = 'emscripten.ico' +html_favicon = '_static/emscripten.ico' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, diff --git a/site/source/docs/api_reference/emscripten.h.rst b/site/source/docs/api_reference/emscripten.h.rst index a03d13dfdaafd..a33abcd4e4c9c 100644 --- a/site/source/docs/api_reference/emscripten.h.rst +++ b/site/source/docs/api_reference/emscripten.h.rst @@ -247,7 +247,7 @@ Functions :param int mode: The timing mode to use. Allowed values are EM_TIMING_SETTIMEOUT, EM_TIMING_RAF. - :param int value: The timing value to activate for the main loop. This value interpreted differently according to the ``mode``parameter: + :param int value: The timing value to activate for the main loop. This value interpreted differently according to the ``mode`` parameter: - If ``mode`` is EM_TIMING_SETTIMEOUT, then ``value`` specifies the number of milliseconds to wait between subsequent ticks to the main loop and updates occur independent of the vsync rate of the display (vsync off). This method uses the JavaScript ``setTimeout`` function to drive the animation. - If ``mode`` is EM_TIMING_RAF, then updates are performed using the ``requestAnimationFrame`` function (with vsync enabled), and this value is interpreted as a "swap interval" rate for the main loop. The value of ``1`` specifies the runtime that it should render at every vsync (typically 60fps), whereas the value ``2`` means that the main loop callback should be called only every second vsync (30fps). As a general formula, the value ``n`` means that the main loop is updated at every n'th vsync, or at a rate of ``60/n`` for 60Hz displays, and ``120/n`` for 120Hz displays. @@ -263,9 +263,10 @@ Functions Returns the current main loop timing mode that is in effect. For interpretation of the values, see the documentation of the function :c:func:`emscripten_set_main_loop_timing`. The timing mode is controlled by calling the functions :c:func:`emscripten_set_main_loop_timing` and :c:func:`emscripten_set_main_loop`. - :param int *mode: If not null, the used timing mode is returned here. - - :param int *value: If not null, the used timing value is returned here. + :param mode: If not null, the used timing mode is returned here. + :type mode: int* + :param value: If not null, the used timing value is returned here. + :type value: int* .. c:function:: void emscripten_set_main_loop_expected_blockers(int num) diff --git a/site/source/docs/api_reference/html5.h.rst b/site/source/docs/api_reference/html5.h.rst index 2de30fccc78e3..916da859900df 100644 --- a/site/source/docs/api_reference/html5.h.rst +++ b/site/source/docs/api_reference/html5.h.rst @@ -1207,7 +1207,8 @@ Functions .. note:: This function makes changes to the DOM to satisfy consistent presentation across browsers. These changes have been designed to intrude as little as possible, and the changes are cleared once windowed browsing is restored. If any of these changes are conflicting, see the function :c:func:`emscripten_request_fullscreen` instead, which performs a bare fullscreen request without any modifications to the DOM. - :param const EmscriptenFullscreenStrategy *fullscreenStrategy: [in] Points to a configuration structure filled by the caller which specifies display options for the fullscreen mode. + :param fullscreenStrategy: [in] Points to a configuration structure filled by the caller which specifies display options for the fullscreen mode. + :type fullscreenStrategy: const EmscriptenFullscreenStrategy* .. c:function:: EMSCRIPTEN_RESULT emscripten_exit_fullscreen(void) diff --git a/site/source/docs/api_reference/preamble.js.rst b/site/source/docs/api_reference/preamble.js.rst index 5970282e1a36d..fa7a8a177642a 100644 --- a/site/source/docs/api_reference/preamble.js.rst +++ b/site/source/docs/api_reference/preamble.js.rst @@ -52,7 +52,9 @@ Calling compiled C functions from JavaScript :param ident: The name of the C function to be called. :param returnType: The return type of the function. This can be ``"number"``, ``"string"`` or ``"array"``, which correspond to the appropriate JavaScript types (use ``"number"`` for any C pointer, and ``"array"`` for JavaScript arrays and typed arrays; note that arrays are 8-bit), or for a void function it can be ``null`` (note: the JavaScript ``null`` value, not a string containing the word "null"). + .. note:: 64-bit integers become two 32-bit parameters, for the low and high bits (since 64-bit integers cannot be represented in JavaScript numbers). + :param argTypes: An array of the types of arguments for the function (if there are no arguments, this can be omitted). Types are as in ``returnType``, except that ``array`` is not supported as there is no way for us to know the length of the array). :param args: An array of the arguments to the function, as native JavaScript values (as in ``returnType``). Note that string arguments will be stored on the stack (the JavaScript string will become a C string on the stack). :returns: The result of the function call as a native JavaScript value (as in ``returnType``). diff --git a/site/source/docs/building_from_source/verify_emscripten_environment.rst b/site/source/docs/building_from_source/verify_emscripten_environment.rst index 0cadc5d248004..c700287fd3801 100644 --- a/site/source/docs/building_from_source/verify_emscripten_environment.rst +++ b/site/source/docs/building_from_source/verify_emscripten_environment.rst @@ -98,6 +98,6 @@ Other common problems to check for are: python emcc -.. COMMENT:: **HamishW** Need to clarify if this last point on Python2 is Linux/Mac only, and if not, what needs to be done on Windows. +.. COMMENT : **HamishW** Need to clarify if this last point on Python2 is Linux/Mac only, and if not, what needs to be done on Windows. If none of the above is helpful, then please :ref:`contact us ` for help. diff --git a/site/source/docs/getting_started/test-suite.rst b/site/source/docs/getting_started/test-suite.rst index 3f73253508935..84fcb159a1f11 100644 --- a/site/source/docs/getting_started/test-suite.rst +++ b/site/source/docs/getting_started/test-suite.rst @@ -19,7 +19,7 @@ The whole core test suite can be run using the script `tests/runner.py ` - This may take several hours. - :term:`Node.js` cannot run all of the tests in the suite; if you need to run them all, you should get a recent trunk version of the `SpiderMonkey `_ shell. On Windows you can install and activate *SpiderMonkey* using the :ref:`emsdk`. diff --git a/site/source/docs/index.rst b/site/source/docs/index.rst index a0332b099fe87..ea2165e5e52e1 100644 --- a/site/source/docs/index.rst +++ b/site/source/docs/index.rst @@ -14,7 +14,7 @@ This comprehensive documentation set contains everything you need to know to use **Emscripten Fundamentals:** - :ref:`integrating-porting-index` illustrates the main differences between the native and Emscripten runtime environments, and explains the changes you need to make to prepare your C/C++ code for the Web. -- :ref:`optimizing-index` shows how to optimise your code for size and performance. +- :ref:`Optimizing-Code` shows how to optimise your code for size and performance. - :ref:`compiling-and-running-projects-index` demonstrates how to integrate Emscripten into your existing project build system. **Contributing:** diff --git a/site/source/docs/introducing_emscripten/Talks-and-Publications.rst b/site/source/docs/introducing_emscripten/Talks-and-Publications.rst index c64a7ead78fe5..6b3e7a010eced 100644 --- a/site/source/docs/introducing_emscripten/Talks-and-Publications.rst +++ b/site/source/docs/introducing_emscripten/Talks-and-Publications.rst @@ -8,10 +8,14 @@ Presentations ============= - Slides from CppCon 2014: - - `Emscripten & asm.js: C++'s role in the modern web `_ (`kripken `_) - - `Video of talk `_ - - `Connecting C++ and JavaScript on the Web with Embind `_ (`chadaustin `_) - - `Video of talk `_ + + - `Emscripten & asm.js: C++'s role in the modern web `_ (`kripken `_) + + - `Video of talk `_ + + - `Connecting C++ and JavaScript on the Web with Embind `_ (`chadaustin `_) + + - `Video of talk `_ - Slides from GDC 2014: `Getting started with asm.js and Emscripten `_ (`kripken `_, `lwagner `_) - Slides from Strange Loop 2013: `Native speed on the web, JavaScript and asm.js `_ (`kripken `_) diff --git a/site/source/docs/porting/connecting_cpp_and_javascript/embind.rst b/site/source/docs/porting/connecting_cpp_and_javascript/embind.rst index c03b0335ebcf8..4194c080edaad 100644 --- a/site/source/docs/porting/connecting_cpp_and_javascript/embind.rst +++ b/site/source/docs/porting/connecting_cpp_and_javascript/embind.rst @@ -24,7 +24,7 @@ passed to JavaScript. .. tip:: In addition to the code in this article: - There are many other examples of how to use *Embind* in the `Test Suite`_. - - `Connecting C++ and JavaScript on the Web with Embind>`_ (slides from + - `Connecting C++ and JavaScript on the Web with Embind`_ (slides from CppCon 2014) contains more examples and information about *Embind*'s design philosophy and implementation. @@ -787,7 +787,7 @@ real-world applications has proved to be more than acceptable. .. _Connecting C++ and JavaScript on the Web with Embind: http://chadaustin.me/2014/09/connecting-c-and-javascript-on-the-web-with-embind/ .. _Boost.Python: http://www.boost.org/doc/libs/1_56_0/libs/python/doc/ .. _finalizers: http://en.wikipedia.org/wiki/Finalizer -.. _Boost.Python-like raw pointer policies`: https://wiki.python.org/moin/boost.python/CallPolicy +.. _Boost.Python-like raw pointer policies: https://wiki.python.org/moin/boost.python/CallPolicy .. _Backbone.js: http://backbonejs.org/#Model-extend .. _Web Audio API: https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API .. _Making sine, square, sawtooth and triangle waves: http://stuartmemo.com/making-sine-square-sawtooth-and-triangle-waves/ diff --git a/site/source/docs/porting/files/packaging_files.rst b/site/source/docs/porting/files/packaging_files.rst index bcbb2c3fddba2..d640db8c9a488 100644 --- a/site/source/docs/porting/files/packaging_files.rst +++ b/site/source/docs/porting/files/packaging_files.rst @@ -75,6 +75,7 @@ This model is supported by changing the :js:attr:`Module.filePackagePrefixURL` t .. _packaging-files-packaged-file-location: Modifying file locations in the virtual file system +=================================================== The default approach for packaging is to directly map the nested file structure at compile time — relative to the compile-time command prompt directory — to the root of the virtual file system. The ``@`` symbol can be used in a path at build time to *explicitly* specify where the resource will be located in the virtual file system at runtime. diff --git a/site/source/docs/porting/index.rst b/site/source/docs/porting/index.rst index cdbc326fbb6ea..f237ec6470bf4 100644 --- a/site/source/docs/porting/index.rst +++ b/site/source/docs/porting/index.rst @@ -15,6 +15,7 @@ The topics in this section cover the main integration points that you need to co files/index multimedia_and_graphics/index Debugging + pthreads From 70fa0ab9cb00cce9019d5f07deeab5590c25cd45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Jun 2015 17:48:13 +0300 Subject: [PATCH 14/32] Add SSE2 test, and revise the SSE1 test to share implementation. --- tests/test_sse1_full.c | 255 ---------------------------------- tests/test_sse1_full.cpp | 144 +++++++++++++++++++ tests/test_sse2_full.cpp | 290 +++++++++++++++++++++++++++++++++++++++ tests/test_sse_full.h | 257 ++++++++++++++++++++++++++++++++++ 4 files changed, 691 insertions(+), 255 deletions(-) delete mode 100644 tests/test_sse1_full.c create mode 100644 tests/test_sse1_full.cpp create mode 100644 tests/test_sse2_full.cpp create mode 100644 tests/test_sse_full.h diff --git a/tests/test_sse1_full.c b/tests/test_sse1_full.c deleted file mode 100644 index 71e402dda46de..0000000000000 --- a/tests/test_sse1_full.c +++ /dev/null @@ -1,255 +0,0 @@ -// This file uses SSE1 by calling different functions with different interesting inputs and prints the results. -// Use a diff tool to compare the results between platforms. - -#include -#include -#include -#include -#include -#include -#include - -// Recasts floating point representation of f to an integer. -uint32_t fcastu(float f) { return *(uint32_t*)&f; } -float ucastf(uint32_t t) { return *(float*)&t; } - -// Data used in test. Store them global and access via a getter to confuse optimizer to not "solve" the whole test suite at compile-time, -// so that the operation will actually be performed at runtime, and not at compile-time. (Testing the capacity of the compiler to perform -// SIMD ops at compile-time would be interesting as well, but that's for another test) -float interesting_floats_[] = { -INFINITY, -FLT_MAX, -2.5f, -1.5f, -1.4f, -1.0f, -0.5f, -0.2f, -FLT_MIN, -0.f, 0.f, - 1.401298464e-45f, FLT_MIN, 0.3f, 0.5f, 0.8f, 1.0f, 1.5f, 2.5f, 3.5f, 3.6f, FLT_MAX, INFINITY, NAN, - ucastf(0x01020304), ucastf(0x80000000), ucastf(0x7FFFFFFF), ucastf(0xFFFFFFFF) }; - -bool always_true() { return time(NULL) != 0; } // This function always returns true, but the compiler should not know this. - -bool IsNan(float f) { return (fcastu(f) << 1) > 0xFF000000u; } - -char *SerializeFloat(float f, char *dstStr) -{ - if (!IsNan(f)) - { - int numChars = sprintf(dstStr, "%.9g", f); - return dstStr + numChars; - } - else - { - uint32_t u = fcastu(f); - int numChars = sprintf(dstStr, "NaN(%8X)", (unsigned int)u); - return dstStr + numChars; - } -} - -void tostr(__m128 *m, char *outstr) -{ - union { __m128 m; float val[4]; } u; - u.m = *m; - char s[4][32]; - SerializeFloat(u.val[0], s[0]); - SerializeFloat(u.val[1], s[1]); - SerializeFloat(u.val[2], s[2]); - SerializeFloat(u.val[3], s[3]); - sprintf(outstr, "[%s,%s,%s,%s]", s[3], s[2], s[1], s[0]); -} - -// Accessors to the test data in a way that the compiler can't optimize at compile-time. -__attribute__((noinline)) float *get_arr() -{ - return always_true() ? interesting_floats_ : 0; -} - -__m128 ExtractInRandomOrder(float *arr, int i, int n, int prime) -{ - return _mm_set_ps(arr[(i*prime)%n], arr[((i+1)*prime)%n], arr[((i+2)*prime)%n], arr[((i+3)*prime)%n]); -} - -#define E1(arr, i, n) ExtractInRandomOrder(arr, i, n, 1) -#define E2(arr, i, n) ExtractInRandomOrder(arr, i, n, 1787) - -#define M128_M128(func) \ - for(int i = 0; i < numInterestingFloats / 4; ++i) \ - for(int k = 0; k < 4; ++k) \ - for(int j = 0; j < numInterestingFloats / 4; ++j) \ - { \ - __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ - __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \ - __m128 ret = func(m1, m2); \ - char str[256], str2[256], str3[256]; \ - tostr(&m1, str); tostr(&m2, str2); tostr(&ret, str3); \ - printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ - } - -#define M128_M128_int(func) \ - for(int i = 0; i < numInterestingFloats / 4; ++i) \ - for(int k = 0; k < 4; ++k) \ - for(int j = 0; j < numInterestingFloats / 4; ++j) \ - { \ - __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ - __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \ - int ret = func(m1, m2); \ - char str[256], str2[256]; \ - tostr(&m1, str); tostr(&m2, str2); \ - printf("%s(%s, %s) = %d\n", #func, str, str2, ret); \ - } - -#define M128(func) \ - for(int i = 0; i < numInterestingFloats / 4; ++i) \ - for(int k = 0; k < 4; ++k) \ - { \ - __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ - __m128 ret = func(m1); \ - char str[256], str2[256]; \ - tostr(&m1, str); tostr(&ret, str2); \ - printf("%s(%s) = %s\n", #func, str, str2); \ - } - -#define M128_M128_shuffle() \ - for(int i = 0; i < numInterestingFloats / 4; ++i) \ - for(int k = 0; k < 4; ++k) \ - for(int j = 0; j < numInterestingFloats / 4; ++j) \ - { \ - __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ - __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \ - __m128 ret = _mm_shuffle_ps(m1, m2, _MM_SHUFFLE(1, 3, 0, 2)); \ - char str[256], str2[256], str3[256]; \ - tostr(&m1, str); tostr(&m2, str2); tostr(&ret, str3); \ - printf("%s(%s, %s) = %s\n", "_mm_shuffle_ps", str, str2, str3); \ - } - -int main() -{ - float *interesting_floats = get_arr(); - int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]); - assert(numInterestingFloats % 4 == 0); - - // SSE1 Arithmetic instructions: - M128_M128(_mm_add_ps); - M128_M128(_mm_add_ss); - M128_M128(_mm_div_ps); - M128_M128(_mm_div_ss); - M128_M128(_mm_mul_ps); - M128_M128(_mm_mul_ss); - M128_M128(_mm_sub_ps); - M128_M128(_mm_sub_ss); - - // SSE1 Elementary Math functions: - M128(_mm_rcp_ps); - M128(_mm_rcp_ss); - M128(_mm_rsqrt_ps); - M128(_mm_rsqrt_ss); - M128(_mm_sqrt_ps); - M128(_mm_sqrt_ss); - - // SSE1 Logical instructions: - M128_M128(_mm_and_ps); - M128_M128(_mm_andnot_ps); - M128_M128(_mm_or_ps); - M128_M128(_mm_xor_ps); - - // SSE1 Compare instructions: - M128_M128(_mm_cmpeq_ps); - M128_M128(_mm_cmpeq_ss); - M128_M128(_mm_cmpge_ps); - M128_M128(_mm_cmpge_ss); - M128_M128(_mm_cmpgt_ps); - M128_M128(_mm_cmpgt_ss); - M128_M128(_mm_cmple_ps); - M128_M128(_mm_cmple_ss); - M128_M128(_mm_cmplt_ps); - M128_M128(_mm_cmplt_ss); - M128_M128(_mm_cmpneq_ps); - M128_M128(_mm_cmpneq_ss); - M128_M128(_mm_cmpnge_ps); - M128_M128(_mm_cmpnge_ss); - M128_M128(_mm_cmpngt_ps); - M128_M128(_mm_cmpngt_ss); - M128_M128(_mm_cmpnle_ps); - M128_M128(_mm_cmpnle_ss); - M128_M128(_mm_cmpnlt_ps); - M128_M128(_mm_cmpnlt_ss); - M128_M128(_mm_cmpord_ps); - M128_M128(_mm_cmpord_ss); - M128_M128(_mm_cmpunord_ps); - M128_M128(_mm_cmpunord_ss); - - M128_M128_int(_mm_comieq_ss); - M128_M128_int(_mm_comige_ss); - M128_M128_int(_mm_comigt_ss); - M128_M128_int(_mm_comile_ss); - M128_M128_int(_mm_comilt_ss); - M128_M128_int(_mm_comineq_ss); - M128_M128_int(_mm_ucomieq_ss); - M128_M128_int(_mm_ucomige_ss); - M128_M128_int(_mm_ucomigt_ss); - M128_M128_int(_mm_ucomile_ss); - M128_M128_int(_mm_ucomilt_ss); - M128_M128_int(_mm_ucomineq_ss); - -/* - // SSE1 Convert instructions: - _mm_cvt_si2ss; - _mm_cvt_ss2si; - _mm_cvtsi32_ss; - _mm_cvtss_f32; - _mm_cvtss_si32; - _mm_cvtss_si64; - _mm_cvtt_ss2si; - _mm_cvttss_si32; - _mm_cvttss_si64; -*/ - -/* - // SSE1 Load functions: - _mm_load_ps - _mm_load_ps1 - _mm_load_ss - _mm_load1_ps - _mm_loadh_pi - _mm_loadl_pi - _mm_loadr_ps - _mm_loadu_ps -*/ - - // SSE1 Miscellaneous functions: -// _mm_movemask_ps - - // SSE1 Move functions: - M128_M128(_mm_move_ss); - M128_M128(_mm_movehl_ps); - M128_M128(_mm_movelh_ps); - -/* - // SSE1 Set functions: - _mm_set_ps - _mm_set_ps1 - _mm_set_ss - _mm_set1_ps - _mm_setr_ps - _mm_setzero_ps -*/ - - // SSE1 Special Math instructions: - M128_M128(_mm_max_ps); - M128_M128(_mm_max_ss); - M128_M128(_mm_min_ps); - M128_M128(_mm_min_ss); - -/* - // SSE1 Store instructions: - _mm_store_ps - _mm_store_ps1 - _mm_store_ss - _mm_store1_ps - _mm_storeh_pi - _mm_storel_pi - _mm_storer_ps - _mm_storeu_ps - _mm_stream_pi - _mm_stream_ps -*/ - - // SSE1 Swizzle instructions: - M128_M128_shuffle(); - // _MM_TRANSPOSE4_PS - M128_M128(_mm_unpackhi_ps); - M128_M128(_mm_unpacklo_ps); -} diff --git a/tests/test_sse1_full.cpp b/tests/test_sse1_full.cpp new file mode 100644 index 0000000000000..9e6d1e91f0669 --- /dev/null +++ b/tests/test_sse1_full.cpp @@ -0,0 +1,144 @@ +// This file uses SSE1 by calling different functions with different interesting inputs and prints the results. +// Use a diff tool to compare the results between platforms. + +#include +#include "test_sse_full.h" + +int main() +{ + float *interesting_floats = get_interesting_floats(); + int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]); + assert(numInterestingFloats % 4 == 0); + + // SSE1 Arithmetic instructions: + Ret_M128_M128(__m128, _mm_add_ps); + Ret_M128_M128(__m128, _mm_add_ss); + Ret_M128_M128(__m128, _mm_div_ps); + Ret_M128_M128(__m128, _mm_div_ss); + Ret_M128_M128(__m128, _mm_mul_ps); + Ret_M128_M128(__m128, _mm_mul_ss); + Ret_M128_M128(__m128, _mm_sub_ps); + Ret_M128_M128(__m128, _mm_sub_ss); + + // SSE1 Elementary Math functions: + Ret_M128(__m128, _mm_rcp_ps); + Ret_M128(__m128, _mm_rcp_ss); + Ret_M128(__m128, _mm_rsqrt_ps); + Ret_M128(__m128, _mm_rsqrt_ss); + Ret_M128(__m128, _mm_sqrt_ps); + Ret_M128(__m128, _mm_sqrt_ss); + + // SSE1 Logical instructions: + Ret_M128_M128(__m128, _mm_and_ps); + Ret_M128_M128(__m128, _mm_andnot_ps); + Ret_M128_M128(__m128, _mm_or_ps); + Ret_M128_M128(__m128, _mm_xor_ps); + + // SSE1 Compare instructions: + Ret_M128_M128(__m128, _mm_cmpeq_ps); + Ret_M128_M128(__m128, _mm_cmpeq_ss); + Ret_M128_M128(__m128, _mm_cmpge_ps); + Ret_M128_M128(__m128, _mm_cmpge_ss); + Ret_M128_M128(__m128, _mm_cmpgt_ps); + Ret_M128_M128(__m128, _mm_cmpgt_ss); + Ret_M128_M128(__m128, _mm_cmple_ps); + Ret_M128_M128(__m128, _mm_cmple_ss); + Ret_M128_M128(__m128, _mm_cmplt_ps); + Ret_M128_M128(__m128, _mm_cmplt_ss); + Ret_M128_M128(__m128, _mm_cmpneq_ps); + Ret_M128_M128(__m128, _mm_cmpneq_ss); + Ret_M128_M128(__m128, _mm_cmpnge_ps); + Ret_M128_M128(__m128, _mm_cmpnge_ss); + Ret_M128_M128(__m128, _mm_cmpngt_ps); + Ret_M128_M128(__m128, _mm_cmpngt_ss); + Ret_M128_M128(__m128, _mm_cmpnle_ps); + Ret_M128_M128(__m128, _mm_cmpnle_ss); + Ret_M128_M128(__m128, _mm_cmpnlt_ps); + Ret_M128_M128(__m128, _mm_cmpnlt_ss); + Ret_M128_M128(__m128, _mm_cmpord_ps); + Ret_M128_M128(__m128, _mm_cmpord_ss); + Ret_M128_M128(__m128, _mm_cmpunord_ps); + Ret_M128_M128(__m128, _mm_cmpunord_ss); + + Ret_M128_M128(int, _mm_comieq_ss); + Ret_M128_M128(int, _mm_comige_ss); + Ret_M128_M128(int, _mm_comigt_ss); + Ret_M128_M128(int, _mm_comile_ss); + Ret_M128_M128(int, _mm_comilt_ss); + Ret_M128_M128(int, _mm_comineq_ss); + Ret_M128_M128(int, _mm_ucomieq_ss); + Ret_M128_M128(int, _mm_ucomige_ss); + Ret_M128_M128(int, _mm_ucomigt_ss); + Ret_M128_M128(int, _mm_ucomile_ss); + Ret_M128_M128(int, _mm_ucomilt_ss); + Ret_M128_M128(int, _mm_ucomineq_ss); + +/* + // SSE1 Convert instructions: + _mm_cvt_si2ss; + _mm_cvt_ss2si; + _mm_cvtsi32_ss; + _mm_cvtss_f32; + _mm_cvtss_si32; + _mm_cvtss_si64; + _mm_cvtt_ss2si; + _mm_cvttss_si32; + _mm_cvttss_si64; +*/ + +/* + // SSE1 Load functions: + _mm_load_ps + _mm_load_ps1 + _mm_load_ss + _mm_load1_ps + _mm_loadh_pi + _mm_loadl_pi + _mm_loadr_ps + _mm_loadu_ps +*/ + + // SSE1 Miscellaneous functions: +// _mm_movemask_ps + + // SSE1 Move functions: + Ret_M128_M128(__m128, _mm_move_ss); + Ret_M128_M128(__m128, _mm_movehl_ps); + Ret_M128_M128(__m128, _mm_movelh_ps); + +/* + // SSE1 Set functions: + _mm_set_ps + _mm_set_ps1 + _mm_set_ss + _mm_set1_ps + _mm_setr_ps + _mm_setzero_ps +*/ + + // SSE1 Special Math instructions: + Ret_M128_M128(__m128, _mm_max_ps); + Ret_M128_M128(__m128, _mm_max_ss); + Ret_M128_M128(__m128, _mm_min_ps); + Ret_M128_M128(__m128, _mm_min_ss); + +/* + // SSE1 Store instructions: + _mm_store_ps + _mm_store_ps1 + _mm_store_ss + _mm_store1_ps + _mm_storeh_pi + _mm_storel_pi + _mm_storer_ps + _mm_storeu_ps + _mm_stream_pi + _mm_stream_ps +*/ + + // SSE1 Swizzle instructions: + M128_M128_shuffle(); + // _MM_TRANSPOSE4_PS + Ret_M128_M128(__m128, _mm_unpackhi_ps); + Ret_M128_M128(__m128, _mm_unpacklo_ps); +} diff --git a/tests/test_sse2_full.cpp b/tests/test_sse2_full.cpp new file mode 100644 index 0000000000000..c5c0db8a1c27e --- /dev/null +++ b/tests/test_sse2_full.cpp @@ -0,0 +1,290 @@ +// This file uses SSE2 by calling different functions with different interesting inputs and prints the results. +// Use a diff tool to compare the results between platforms. + +#include +#include "test_sse_full.h" + +int main() +{ + float *interesting_floats = get_interesting_floats(); + int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]); + assert(numInterestingFloats % 4 == 0); + + uint32_t *interesting_ints = get_interesting_ints(); + int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]); + assert(numInterestingInts % 4 == 0); + + double *interesting_doubles = get_interesting_doubles(); + int numInterestingDoubles = sizeof(interesting_doubles_)/sizeof(interesting_doubles_[0]); + assert(numInterestingDoubles % 4 == 0); + + // SSE2 Arithmetic instructions: + M128i_M128i_M128i(_mm_add_epi16); + M128i_M128i_M128i(_mm_add_epi32); + M128i_M128i_M128i(_mm_add_epi64); + M128i_M128i_M128i(_mm_add_epi8); + Ret_M128d_M128d(__m128d, _mm_add_pd); + Ret_M128d_M128d(__m128d, _mm_add_sd); + + M128i_M128i_M128i(_mm_adds_epi16); + M128i_M128i_M128i(_mm_adds_epi8); + M128i_M128i_M128i(_mm_adds_epu16); + M128i_M128i_M128i(_mm_adds_epu8); + + Ret_M128d_M128d(__m128d, _mm_div_pd); + Ret_M128d_M128d(__m128d, _mm_div_sd); + + M128i_M128i_M128i(_mm_madd_epi16); + M128i_M128i_M128i(_mm_mul_epu32); + + Ret_M128d_M128d(__m128d, _mm_mul_pd); + Ret_M128d_M128d(__m128d, _mm_mul_sd); + + M128i_M128i_M128i(_mm_mulhi_epi16); + M128i_M128i_M128i(_mm_mulhi_epu16); + M128i_M128i_M128i(_mm_mullo_epi16); + M128i_M128i_M128i(_mm_sad_epu8); + M128i_M128i_M128i(_mm_sub_epi16); + M128i_M128i_M128i(_mm_sub_epi32); + M128i_M128i_M128i(_mm_sub_epi64); + M128i_M128i_M128i(_mm_sub_epi8); + + Ret_M128d_M128d(__m128d, _mm_sub_pd); + Ret_M128d_M128d(__m128d, _mm_sub_sd); + + M128i_M128i_M128i(_mm_subs_epi16); + M128i_M128i_M128i(_mm_subs_epi8); + M128i_M128i_M128i(_mm_subs_epu16); + M128i_M128i_M128i(_mm_subs_epu8); + + // SSE2 Cast functions: + Ret_M128d(__m128, _mm_castpd_ps); + Ret_M128d(__m128i, _mm_castpd_si128); + Ret_M128(__m128d, _mm_castps_pd); + Ret_M128(__m128i, _mm_castps_si128); + Ret_M128i(__m128d, _mm_castsi128_pd); + Ret_M128i(__m128, _mm_castsi128_ps); + + // SSE2 Compare instructions: + M128i_M128i_M128i(_mm_cmpeq_epi16); + M128i_M128i_M128i(_mm_cmpeq_epi32); + M128i_M128i_M128i(_mm_cmpeq_epi8); + Ret_M128d_M128d(__m128d, _mm_cmpeq_pd); + Ret_M128d_M128d(__m128d, _mm_cmpeq_sd); + Ret_M128d_M128d(__m128d, _mm_cmpge_pd); + Ret_M128d_M128d(__m128d, _mm_cmpge_sd); + M128i_M128i_M128i(_mm_cmpgt_epi16); + M128i_M128i_M128i(_mm_cmpgt_epi32); + M128i_M128i_M128i(_mm_cmpgt_epi8); + Ret_M128d_M128d(__m128d, _mm_cmpgt_pd); + Ret_M128d_M128d(__m128d, _mm_cmpgt_sd); + Ret_M128d_M128d(__m128d, _mm_cmple_pd); + Ret_M128d_M128d(__m128d, _mm_cmple_sd); + M128i_M128i_M128i(_mm_cmplt_epi16); + M128i_M128i_M128i(_mm_cmplt_epi32); + M128i_M128i_M128i(_mm_cmplt_epi8); + Ret_M128d_M128d(__m128d, _mm_cmplt_pd); + Ret_M128d_M128d(__m128d, _mm_cmplt_sd); + Ret_M128d_M128d(__m128d, _mm_cmpneq_pd); + Ret_M128d_M128d(__m128d, _mm_cmpneq_sd); + Ret_M128d_M128d(__m128d, _mm_cmpnge_pd); + Ret_M128d_M128d(__m128d, _mm_cmpnge_sd); + Ret_M128d_M128d(__m128d, _mm_cmpngt_pd); + Ret_M128d_M128d(__m128d, _mm_cmpngt_sd); + Ret_M128d_M128d(__m128d, _mm_cmpnle_pd); + Ret_M128d_M128d(__m128d, _mm_cmpnle_sd); + Ret_M128d_M128d(__m128d, _mm_cmpnlt_pd); + Ret_M128d_M128d(__m128d, _mm_cmpnlt_sd); + Ret_M128d_M128d(__m128d, _mm_cmpord_pd); + Ret_M128d_M128d(__m128d, _mm_cmpord_sd); + Ret_M128d_M128d(__m128d, _mm_cmpunord_pd); + Ret_M128d_M128d(__m128d, _mm_cmpunord_sd); + + Ret_M128d_M128d(int, _mm_comieq_sd); + Ret_M128d_M128d(int, _mm_comige_sd); + Ret_M128d_M128d(int, _mm_comigt_sd); + Ret_M128d_M128d(int, _mm_comile_sd); + Ret_M128d_M128d(int, _mm_comilt_sd); + Ret_M128d_M128d(int, _mm_comineq_sd); + Ret_M128d_M128d(int, _mm_ucomieq_sd); + Ret_M128d_M128d(int, _mm_ucomige_sd); + Ret_M128d_M128d(int, _mm_ucomigt_sd); + Ret_M128d_M128d(int, _mm_ucomile_sd); + Ret_M128d_M128d(int, _mm_ucomilt_sd); + Ret_M128d_M128d(int, _mm_ucomineq_sd); + + // SSE2 Convert instructions: + Ret_M128i(__m128d, _mm_cvtepi32_pd); + Ret_M128i(__m128, _mm_cvtepi32_ps); + Ret_M128d(__m128i, _mm_cvtpd_epi32); + Ret_M128d(__m128, _mm_cvtpd_ps); + Ret_M128(__m128i, _mm_cvtps_epi32); + Ret_M128(__m128d, _mm_cvtps_pd); + Ret_M128(double, _mm_cvtsd_f64); + Ret_M128d(int, _mm_cvtsd_si32); + Ret_M128d(int64_t, _mm_cvtsd_si64); +// Ret_M128d(int64_t, _mm_cvtsd_si64x); + Ret_M128i(int, _mm_cvtsi128_si32); + Ret_M128i(int64_t, _mm_cvtsi128_si64); +// Ret_M128i(int64_t, _mm_cvtsi128_si64x); +// M128d_M128d_int(_mm_cvtsi32_sd); + Ret_int(__m128i, _mm_cvtsi32_si128); +// M128d_M128d_int64(_mm_cvtsi64_sd); + Ret_int64(__m128i, _mm_cvtsi64_si128); +// Ret_int64(__m128d, _mm_cvtsi64x_sd); +// Ret_int64(__m128i, _mm_cvtsi64x_si128); + Ret_M128d_M128d(__m128d, _mm_cvtss_sd); + Ret_M128d(__m128i, _mm_cvttpd_epi32); + Ret_M128(__m128i, _mm_cvttps_epi32); + Ret_M128d(int, _mm_cvttsd_si32); + Ret_M128d(int64_t, _mm_cvttsd_si64); +// Ret_M128d(int64_t, _mm_cvttsd_si64x); + + // SSE2 Elementary Math Functions instructions: + Ret_M128d(__m128d, _mm_sqrt_pd); + Ret_M128d(__m128d, _mm_sqrt_ps); + + // SSE2 General Support instructions: + /* + _mm_clflush + _mm_lfence + _mm_mfence + _mm_pause + */ + +/* + // SSE2 Load functions: + _mm_load_pd + _mm_load_pd1 + _mm_load_sd + _mm_load_si128 + _mm_load1_pd + _mm_loadh_pd + _mm_loadl_epi64 + _mm_loadl_pd + _mm_loadr_pd + _mm_loadu_pd + _mm_loadu_si128 +*/ + + // SSE2 Logical instructions: + Ret_M128d_M128d(__m128d, _mm_and_pd); + M128i_M128i_M128i(_mm_and_si128); + Ret_M128d_M128d(__m128d, _mm_andnot_pd); + M128i_M128i_M128i(_mm_andnot_si128); + Ret_M128d_M128d(__m128d, _mm_or_pd); + M128i_M128i_M128i(_mm_or_si128); + Ret_M128d_M128d(__m128d, _mm_xor_pd); + M128i_M128i_M128i(_mm_xor_si128); + + // SSE2 Miscellaneous instructions: + Ret_M128i(int, _mm_movemask_epi8); + Ret_M128d(int, _mm_movemask_pd); + M128i_M128i_M128i(_mm_packs_epi16); + M128i_M128i_M128i(_mm_packs_epi32); + M128i_M128i_M128i(_mm_packus_epi16); + M128i_M128i_M128i(_mm_sad_epu8); + + // SSE2 Move instructions: + Ret_M128i(__m128i, _mm_move_epi64); + Ret_M128d_M128d(__m128d, _mm_move_sd); + + // SSE2 Probability/Statistics instructions: + M128i_M128i_M128i(_mm_avg_epu16); + M128i_M128i_M128i(_mm_avg_epu8); + +/* + // SSE2 Set functions: + _mm_set_epi16 + _mm_set_epi32 + _mm_set_epi64 + _mm_set_epi64x + _mm_set_epi8 + _mm_set_pd + _mm_set_pd1 + _mm_set_sd + _mm_set1_epi16 + _mm_set1_epi32 + _mm_set1_epi64 + _mm_set1_epi64x + _mm_set1_epi8 + _mm_set1_pd + _mm_setr_epi16 + _mm_setr_epi32 + _mm_setr_epi64 + _mm_setr_epi8 + _mm_setr_pd + _mm_setzero_pd + _mm_setzero_si128 +*/ + // SSE2 Shift instructions: +// M128i_M128i_int(_mm_bslli_si128); +// M128i_M128i_int(_mm_bsrli_si128); + M128i_M128i_M128i(_mm_sll_epi16); + M128i_M128i_M128i(_mm_sll_epi32); + M128i_M128i_M128i(_mm_sll_epi64); +// M128i_M128i_int(_mm_slli_epi16); +// M128i_M128i_int(_mm_slli_epi32); +// M128i_M128i_int(_mm_slli_epi64); +// M128i_M128i_int(_mm_slli_si128); + M128i_M128i_M128i(_mm_sra_epi16); + M128i_M128i_M128i(_mm_sra_epi32); +// M128i_M128i_int(_mm_srai_epi16); +// M128i_M128i_int(_mm_srai_epi32); + M128i_M128i_M128i(_mm_srl_epi16); + M128i_M128i_M128i(_mm_srl_epi32); + M128i_M128i_M128i(_mm_srl_epi64); +// M128i_M128i_int(_mm_srli_epi16); +// M128i_M128i_int(_mm_srli_epi32); +// M128i_M128i_int(_mm_srli_epi64); +// M128i_M128i_int(_mm_srli_epi128); + + // SSE2 Special Math instructions: + M128i_M128i_M128i(_mm_max_epi16); + M128i_M128i_M128i(_mm_max_epu8); + Ret_M128d_M128d(__m128d, _mm_max_pd); + Ret_M128d_M128d(__m128d, _mm_max_sd); + M128i_M128i_M128i(_mm_min_epi16); + M128i_M128i_M128i(_mm_min_epu8); + Ret_M128d_M128d(__m128d, _mm_min_pd); + Ret_M128d_M128d(__m128d, _mm_min_sd); + +/* + // SSE2 Store instructions: + _mm_maskmoveu_si128 + _mm_store_pd + _mm_store_pd1 + _mm_store_sd + _mm_store_si128 + _mm_store1_pd + _mm_storeh_pd + _mm_storel_epi64 + _mm_storel_pd + _mm_storer_pd + _mm_storeu_pd + _mm_storeu_si128 + _mm_stream_pd + _mm_stream_si128 + _mm_stream_si32 + _mm_stream_si64 +*/ + // SSE2 Swizzle instructions: +/* + _mm_extract_epi16 + _mm_insert_epi16 + _mm_shuffle_epi32 + _mm_shuffle_pd + _mm_shufflehi_epi16 + _mm_shufflelo_epi16 +*/ + M128i_M128i_M128i(_mm_unpackhi_epi16); + M128i_M128i_M128i(_mm_unpackhi_epi32); + M128i_M128i_M128i(_mm_unpackhi_epi64); + M128i_M128i_M128i(_mm_unpackhi_epi8); + Ret_M128d_M128d(__m128d, _mm_unpackhi_pd); + M128i_M128i_M128i(_mm_unpacklo_epi16); + M128i_M128i_M128i(_mm_unpacklo_epi32); + M128i_M128i_M128i(_mm_unpacklo_epi64); + M128i_M128i_M128i(_mm_unpacklo_epi8); + Ret_M128d_M128d(__m128d, _mm_unpacklo_pd); +} diff --git a/tests/test_sse_full.h b/tests/test_sse_full.h new file mode 100644 index 0000000000000..5e5695de59868 --- /dev/null +++ b/tests/test_sse_full.h @@ -0,0 +1,257 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +// Recasts floating point representation of f to an integer. +uint32_t fcastu(float f) { return *(uint32_t*)&f; } +uint64_t dcastu(double f) { return *(uint64_t*)&f; } +float ucastf(uint32_t t) { return *(float*)&t; } +double ucastd(uint64_t t) { return *(double*)&t; } + +// Data used in test. Store them global and access via a getter to confuse optimizer to not "solve" the whole test suite at compile-time, +// so that the operation will actually be performed at runtime, and not at compile-time. (Testing the capacity of the compiler to perform +// SIMD ops at compile-time would be interesting as well, but that's for another test) +float interesting_floats_[] = { -INFINITY, -FLT_MAX, -2.5f, -1.5f, -1.4f, -1.0f, -0.5f, -0.2f, -FLT_MIN, -0.f, 0.f, + 1.401298464e-45f, FLT_MIN, 0.3f, 0.5f, 0.8f, 1.0f, 1.5f, 2.5f, 3.5f, 3.6f, FLT_MAX, INFINITY, NAN, + ucastf(0x01020304), ucastf(0x80000000), ucastf(0x7FFFFFFF), ucastf(0xFFFFFFFF) }; + +double interesting_doubles_[] = { -INFINITY, -FLT_MAX, -2.5, -1.5, -1.4, -1.0, -0.5, -0.2, -FLT_MIN, -0.0, 0.0, + 1.401298464e-45, FLT_MIN, 0.3, 0.5, 0.8, 1.0, 1.5, 2.5, 3.5, 3.6, FLT_MAX, INFINITY, NAN, + ucastd(0x0102030405060708ULL), ucastd(0x8000000000000000ULL), ucastd(0x7FFFFFFFFFFFFFFFULL), ucastd(0xFFFFFFFFFFFFFFFFULL) }; + +uint32_t interesting_ints_[] = { 0, 1, 2, 3, 0x01020304, 0x10203040, 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x12345678, 0x9ABCDEF1, 0x80000000, + 0x80808080, 0x7F7F7F7F, 0x01010101, 0x11111111, 0x20202020, 0x0F0F0F0F, 0xF0F0F0F0, + fcastu(-INFINITY), fcastu(-FLT_MAX), fcastu(-2.5f), fcastu(-1.5f), fcastu(-1.4f), fcastu(-1.0f), fcastu(-0.5f), + fcastu(-0.2f), fcastu(-FLT_MIN), 0xF9301AB9, 0x0039AB12, 0x19302BCD, + fcastu(1.401298464e-45f), fcastu(FLT_MIN), fcastu(0.3f), fcastu(0.5f), fcastu(0.8f), fcastu(1.0f), fcastu(1.5f), + fcastu(2.5f), fcastu(3.5f), fcastu(3.6f), fcastu(FLT_MAX), fcastu(INFINITY), fcastu(NAN) }; + +bool always_true() { return time(NULL) != 0; } // This function always returns true, but the compiler should not know this. + +bool IsNan(float f) { return (fcastu(f) << 1) > 0xFF000000u; } + +char *SerializeFloat(float f, char *dstStr) +{ + if (!IsNan(f)) + { + int numChars = sprintf(dstStr, "%.9g", f); + return dstStr + numChars; + } + else + { + uint32_t u = fcastu(f); + int numChars = sprintf(dstStr, "NaN(0x%8X)", (unsigned int)u); + return dstStr + numChars; + } +} + +char *SerializeDouble(double f, char *dstStr) +{ + if (!IsNan(f)) + { + int numChars = sprintf(dstStr, "%.17g", f); + return dstStr + numChars; + } + else + { + uint64_t u = dcastu(f); + int numChars = sprintf(dstStr, "NaN(0x%08X%08X)", (unsigned int)(u>>32), (unsigned int)u); + return dstStr + numChars; + } +} + +void tostr(__m128 *m, char *outstr) +{ + union { __m128 m; float val[4]; } u; + u.m = *m; + char s[4][32]; + SerializeFloat(u.val[0], s[0]); + SerializeFloat(u.val[1], s[1]); + SerializeFloat(u.val[2], s[2]); + SerializeFloat(u.val[3], s[3]); + sprintf(outstr, "[%s,%s,%s,%s]", s[3], s[2], s[1], s[0]); +} + +void tostr(__m128i *m, char *outstr) +{ + union { __m128i m; uint32_t val[4]; } u; + u.m = *m; + sprintf(outstr, "[0x%08X,0x%08X,0x%08X,0x%08X]", u.val[3], u.val[2], u.val[1], u.val[0]); +} + +void tostr(__m128d *m, char *outstr) +{ + union { __m128d m; double val[2]; } u; + u.m = *m; + char s[2][64]; + SerializeDouble(u.val[0], s[0]); + SerializeDouble(u.val[1], s[1]); + sprintf(outstr, "[%s,%s]", s[1], s[0]); +} + +void tostr(int *m, char *outstr) +{ + sprintf(outstr, "0x%08X", *m); +} + +void tostr(int64_t *m, char *outstr) +{ + sprintf(outstr, "0x%08X%08X", (int)(*m >> 32), (int)*m); +} + +void tostr(float *m, char *outstr) +{ + SerializeFloat(*m, outstr); +} + +void tostr(double *m, char *outstr) +{ + SerializeDouble(*m, outstr); +} + +// Accessors to the test data in a way that the compiler can't optimize at compile-time. +__attribute__((noinline)) float *get_interesting_floats() +{ + return always_true() ? interesting_floats_ : 0; +} + +__attribute__((noinline)) uint32_t *get_interesting_ints() +{ + return always_true() ? interesting_ints_ : 0; +} + +__attribute__((noinline)) double *get_interesting_doubles() +{ + return always_true() ? interesting_doubles_ : 0; +} + +__m128 ExtractInRandomOrder(float *arr, int i, int n, int prime) +{ + return _mm_set_ps(arr[(i*prime)%n], arr[((i+1)*prime)%n], arr[((i+2)*prime)%n], arr[((i+3)*prime)%n]); +} + +__m128i ExtractInRandomOrder(uint32_t *arr, int i, int n, int prime) +{ + return _mm_set_epi32(arr[(i*prime)%n], arr[((i+1)*prime)%n], arr[((i+2)*prime)%n], arr[((i+3)*prime)%n]); +} + +__m128d ExtractInRandomOrder(double *arr, int i, int n, int prime) +{ + return _mm_set_pd(arr[(i*prime)%n], arr[((i+1)*prime)%n]); +} + +#define E1(arr, i, n) ExtractInRandomOrder(arr, i, n, 1) +#define E2(arr, i, n) ExtractInRandomOrder(arr, i, n, 1787) + +#define M128i_M128i_M128i(func) \ + for(int i = 0; i < numInterestingInts / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j < numInterestingInts / 4; ++j) \ + { \ + __m128i m1 = E1(interesting_ints, i*4+k, numInterestingInts); \ + __m128i m2 = E2(interesting_ints, j*4, numInterestingInts); \ + __m128i ret = func(m1, m2); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + +#define Ret_M128d_M128d(Ret_type, func) \ + for(int i = 0; i < numInterestingDoubles / 2; ++i) \ + for(int k = 0; k < 2; ++k) \ + for(int j = 0; j < numInterestingDoubles / 2; ++j) \ + { \ + __m128d m1 = E1(interesting_doubles, i*2+k, numInterestingDoubles); \ + __m128d m2 = E2(interesting_doubles, j*2, numInterestingDoubles); \ + Ret_type ret = func(m1, m2); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + +#define Ret_M128d(Ret_type, func) \ + for(int i = 0; i < numInterestingDoubles / 2; ++i) \ + for(int k = 0; k < 2; ++k) \ + { \ + __m128d m1 = E1(interesting_doubles, i*2+k, numInterestingDoubles); \ + Ret_type ret = func(m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define Ret_M128(Ret_type, func) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + Ret_type ret = func(m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define Ret_M128i(Ret_type, func) \ + for(int i = 0; i < numInterestingInts / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + { \ + __m128i m1 = E1(interesting_ints, i*4+k, numInterestingInts); \ + Ret_type ret = func(m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define Ret_int(Ret_type, func) \ + for(int i = 0; i < numInterestingInts; ++i) \ + { \ + Ret_type ret = func(interesting_ints[i]); \ + char str[256]; tostr((int*)&interesting_ints[i], str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define Ret_int64(Ret_type, func) \ + for(int i = 0; i < numInterestingInts; ++i) \ + for(int j = 0; j < numInterestingInts; ++j) \ + { \ + int64_t m1 = (int64_t)(((uint64_t)interesting_ints[i]) << 32 | (uint64_t)interesting_ints[j]); \ + Ret_type ret = func(m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define Ret_M128_M128(Ret_type, func) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j < numInterestingFloats / 4; ++j) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \ + Ret_type ret = func(m1, m2); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + +#define M128_M128_shuffle() \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j < numInterestingFloats / 4; ++j) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \ + __m128 ret = _mm_shuffle_ps(m1, m2, _MM_SHUFFLE(1, 3, 0, 2)); \ + char str[256], str2[256], str3[256]; \ + tostr(&m1, str); tostr(&m2, str2); tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", "_mm_shuffle_ps", str, str2, str3); \ + } From e68e2c848af4e7766b0f6f3fb092af50f570aeb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Jun 2015 18:04:41 +0300 Subject: [PATCH 15/32] Improve SSE1 test with more cases, fix it to build. --- tests/test_core.py | 6 +++--- tests/test_sse1_full.cpp | 22 ++++++++++------------ tests/test_sse_full.h | 23 +++++++++++++---------- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index fa7b111b6df46..8463fff401085 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -5509,14 +5509,14 @@ def test_sse1(self): def test_sse1_full(self): return self.skip('TODO: This test fails due to bugs #2840, #3044, #3045, #3046 and #3048 (also see #3043 and #3049)') if SPIDERMONKEY_ENGINE not in JS_ENGINES: return self.skip('test_sse1_full requires SpiderMonkey to run.') - Popen([CLANG, path_from_root('tests', 'test_sse1_full.c'), '-o', 'test_sse1_full'] + get_clang_native_args(), stdout=PIPE, stderr=PIPE).communicate() + Popen([CLANG, path_from_root('tests', 'test_sse1_full.cpp'), '-o', 'test_sse1_full'] + get_clang_native_args(), stdout=PIPE, stderr=PIPE).communicate() native_result, err = Popen('./test_sse1_full', stdout=PIPE, stderr=PIPE).communicate() Settings.PRECISE_F32 = 1 # SIMD currently requires Math.fround orig_args = self.emcc_args for mode in [[], ['-s', 'SIMD=1']]: - self.emcc_args = orig_args + mode - self.do_run(open(path_from_root('tests', 'test_sse1_full.c'), 'r').read(), native_result) + self.emcc_args = orig_args + mode + ['-I' + path_from_root('tests')] + self.do_run(open(path_from_root('tests', 'test_sse1_full.cpp'), 'r').read(), native_result) def test_simd(self): if self.is_emterpreter(): return self.skip('todo') diff --git a/tests/test_sse1_full.cpp b/tests/test_sse1_full.cpp index 9e6d1e91f0669..de56da4c6c208 100644 --- a/tests/test_sse1_full.cpp +++ b/tests/test_sse1_full.cpp @@ -73,18 +73,16 @@ int main() Ret_M128_M128(int, _mm_ucomilt_ss); Ret_M128_M128(int, _mm_ucomineq_ss); -/* // SSE1 Convert instructions: - _mm_cvt_si2ss; - _mm_cvt_ss2si; - _mm_cvtsi32_ss; - _mm_cvtss_f32; - _mm_cvtss_si32; - _mm_cvtss_si64; - _mm_cvtt_ss2si; - _mm_cvttss_si32; - _mm_cvttss_si64; -*/ +// M128_M128_int(_mm_cvt_si2ss); + Ret_M128(int, _mm_cvt_ss2si); +// M128_M128_int(_mm_cvtsi32_ss); + Ret_M128(float, _mm_cvtss_f32); + Ret_M128(int, _mm_cvtss_si32); + Ret_M128(int64_t, _mm_cvtss_si64); + Ret_M128(int, _mm_cvtt_ss2si); + Ret_M128(int, _mm_cvttss_si32); + Ret_M128(int64_t, _mm_cvttss_si64); /* // SSE1 Load functions: @@ -99,7 +97,7 @@ int main() */ // SSE1 Miscellaneous functions: -// _mm_movemask_ps + Ret_M128(int, _mm_movemask_ps); // SSE1 Move functions: Ret_M128_M128(__m128, _mm_move_ss); diff --git a/tests/test_sse_full.h b/tests/test_sse_full.h index 5e5695de59868..e5a6d9af29479 100644 --- a/tests/test_sse_full.h +++ b/tests/test_sse_full.h @@ -77,6 +77,8 @@ void tostr(__m128 *m, char *outstr) sprintf(outstr, "[%s,%s,%s,%s]", s[3], s[2], s[1], s[0]); } +#ifdef ENABLE_SSE2 + void tostr(__m128i *m, char *outstr) { union { __m128i m; uint32_t val[4]; } u; @@ -94,6 +96,17 @@ void tostr(__m128d *m, char *outstr) sprintf(outstr, "[%s,%s]", s[1], s[0]); } +__m128i ExtractInRandomOrder(uint32_t *arr, int i, int n, int prime) +{ + return _mm_set_epi32(arr[(i*prime)%n], arr[((i+1)*prime)%n], arr[((i+2)*prime)%n], arr[((i+3)*prime)%n]); +} + +__m128d ExtractInRandomOrder(double *arr, int i, int n, int prime) +{ + return _mm_set_pd(arr[(i*prime)%n], arr[((i+1)*prime)%n]); +} +#endif + void tostr(int *m, char *outstr) { sprintf(outstr, "0x%08X", *m); @@ -135,16 +148,6 @@ __m128 ExtractInRandomOrder(float *arr, int i, int n, int prime) return _mm_set_ps(arr[(i*prime)%n], arr[((i+1)*prime)%n], arr[((i+2)*prime)%n], arr[((i+3)*prime)%n]); } -__m128i ExtractInRandomOrder(uint32_t *arr, int i, int n, int prime) -{ - return _mm_set_epi32(arr[(i*prime)%n], arr[((i+1)*prime)%n], arr[((i+2)*prime)%n], arr[((i+3)*prime)%n]); -} - -__m128d ExtractInRandomOrder(double *arr, int i, int n, int prime) -{ - return _mm_set_pd(arr[(i*prime)%n], arr[((i+1)*prime)%n]); -} - #define E1(arr, i, n) ExtractInRandomOrder(arr, i, n, 1) #define E2(arr, i, n) ExtractInRandomOrder(arr, i, n, 1787) From ca704b70226baf81a732575f29211f5b4f8b9ec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Jun 2015 21:40:00 +0300 Subject: [PATCH 16/32] Add testing of SSE1 and SSE2 load instructions. --- tests/test_sse1_full.cpp | 18 ++++---- tests/test_sse2_full.cpp | 25 +++++------ tests/test_sse_full.h | 95 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 23 deletions(-) diff --git a/tests/test_sse1_full.cpp b/tests/test_sse1_full.cpp index de56da4c6c208..4095e6fb17f28 100644 --- a/tests/test_sse1_full.cpp +++ b/tests/test_sse1_full.cpp @@ -84,17 +84,15 @@ int main() Ret_M128(int, _mm_cvttss_si32); Ret_M128(int64_t, _mm_cvttss_si64); -/* // SSE1 Load functions: - _mm_load_ps - _mm_load_ps1 - _mm_load_ss - _mm_load1_ps - _mm_loadh_pi - _mm_loadl_pi - _mm_loadr_ps - _mm_loadu_ps -*/ + Ret_FloatPtr(__m128, _mm_load_ps, 4, 4); + Ret_FloatPtr(__m128, _mm_load_ps1, 1, 1); + Ret_FloatPtr(__m128, _mm_load_ss, 1, 1); + Ret_FloatPtr(__m128, _mm_load1_ps, 1, 1); + Ret_M128_FloatPtr(__m128, _mm_loadh_pi, __m64*, 2, 1); + Ret_M128_FloatPtr(__m128, _mm_loadl_pi, __m64*, 2, 1); + Ret_FloatPtr(__m128, _mm_loadr_ps, 4, 4); + Ret_FloatPtr(__m128, _mm_loadu_ps, 4, 1); // SSE1 Miscellaneous functions: Ret_M128(int, _mm_movemask_ps); diff --git a/tests/test_sse2_full.cpp b/tests/test_sse2_full.cpp index c5c0db8a1c27e..4c683463321db 100644 --- a/tests/test_sse2_full.cpp +++ b/tests/test_sse2_full.cpp @@ -2,6 +2,7 @@ // Use a diff tool to compare the results between platforms. #include +#define ENABLE_SSE2 #include "test_sse_full.h" int main() @@ -152,20 +153,18 @@ int main() _mm_pause */ -/* // SSE2 Load functions: - _mm_load_pd - _mm_load_pd1 - _mm_load_sd - _mm_load_si128 - _mm_load1_pd - _mm_loadh_pd - _mm_loadl_epi64 - _mm_loadl_pd - _mm_loadr_pd - _mm_loadu_pd - _mm_loadu_si128 -*/ + Ret_DoublePtr(__m128d, _mm_load_pd, 2, 2); + Ret_DoublePtr(__m128d, _mm_load_pd1, 1, 1); + Ret_DoublePtr(__m128d, _mm_load_sd, 1, 1); + Ret_IntPtr(__m128i, _mm_load_si128, __m128i*, 4, 4); + Ret_DoublePtr(__m128d, _mm_load1_pd, 1, 1); + Ret_M128d_DoublePtr(__m128d, _mm_loadh_pd, double*, 1, 1); + Ret_IntPtr(__m128i, _mm_loadl_epi64, __m128i*, 2, 1); + Ret_M128d_DoublePtr(__m128d, _mm_loadl_pd, double*, 1, 1); + Ret_DoublePtr(__m128d, _mm_loadr_pd, 2, 2); + Ret_DoublePtr(__m128d, _mm_loadu_pd, 2, 1); + Ret_IntPtr(__m128i, _mm_loadu_si128, __m128i*, 2, 1); // SSE2 Logical instructions: Ret_M128d_M128d(__m128d, _mm_and_pd); diff --git a/tests/test_sse_full.h b/tests/test_sse_full.h index e5a6d9af29479..966ecea169ee6 100644 --- a/tests/test_sse_full.h +++ b/tests/test_sse_full.h @@ -127,6 +127,43 @@ void tostr(double *m, char *outstr) SerializeDouble(*m, outstr); } +void tostr(double *m, int numElems, char *outstr) +{ + char s[2][64]; + for(int i = 0; i < numElems; ++i) + SerializeDouble(m[i], s[i]); + switch(numElems) + { + case 1: sprintf(outstr, "{%s}", s[0]); break; + case 2: sprintf(outstr, "{%s,%s}", s[0], s[1]); break; + } +} + +void tostr(float *m, int numElems, char *outstr) +{ + char s[4][64]; + for(int i = 0; i < numElems; ++i) + SerializeFloat(m[i], s[i]); + switch(numElems) + { + case 1: sprintf(outstr, "{%s}", s[0]); break; + case 2: sprintf(outstr, "{%s,%s}", s[0], s[1]); break; + case 3: sprintf(outstr, "{%s,%s,%s}", s[0], s[1], s[2]); break; + case 4: sprintf(outstr, "{%s,%s,%s,%s}", s[0], s[1], s[2], s[3]); break; + } +} + +void tostr(int *s, int numElems, char *outstr) +{ + switch(numElems) + { + case 1: sprintf(outstr, "{0x%08X}", s[0]); break; + case 2: sprintf(outstr, "{0x%08X,0x%08X}", s[0], s[1]); break; + case 3: sprintf(outstr, "{0x%08X,0x%08X,0x%08X}", s[0], s[1], s[2]); break; + case 4: sprintf(outstr, "{0x%08X,0x%08X,0x%08X,0x%08X}", s[0], s[1], s[2], s[3]); break; + } +} + // Accessors to the test data in a way that the compiler can't optimize at compile-time. __attribute__((noinline)) float *get_interesting_floats() { @@ -190,6 +227,16 @@ __m128 ExtractInRandomOrder(float *arr, int i, int n, int prime) printf("%s(%s) = %s\n", #func, str, str2); \ } +#define Ret_DoublePtr(Ret_type, func, numElemsAccessed, inc) \ + for(int i = 0; i+numElemsAccessed <= numInterestingDoubles; i += inc) \ + { \ + double *ptr = interesting_doubles + i; \ + Ret_type ret = func(ptr); \ + char str[256]; tostr(ptr, numElemsAccessed, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + #define Ret_M128(Ret_type, func) \ for(int i = 0; i < numInterestingFloats / 4; ++i) \ for(int k = 0; k < 4; ++k) \ @@ -201,6 +248,54 @@ __m128 ExtractInRandomOrder(float *arr, int i, int n, int prime) printf("%s(%s) = %s\n", #func, str, str2); \ } +#define Ret_FloatPtr(Ret_type, func, numElemsAccessed, inc) \ + for(int i = 0; i+numElemsAccessed <= numInterestingFloats; i += inc) \ + { \ + float *ptr = interesting_floats + i; \ + Ret_type ret = func(ptr); \ + char str[256]; tostr(ptr, numElemsAccessed, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define Ret_IntPtr(Ret_type, func, Ptr_type, numElemsAccessed, inc) \ + for(int i = 0; i+numElemsAccessed <= numInterestingInts; i += inc) \ + { \ + uint32_t *ptr = interesting_ints + i; \ + Ret_type ret = func((Ptr_type)ptr); \ + char str[256]; tostr((int*)ptr, numElemsAccessed, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s) = %s\n", #func, str, str2); \ + } + +#define Ret_M128_FloatPtr(Ret_type, func, Ptr_type, numElemsAccessed, inc) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j+numElemsAccessed <= numInterestingFloats; j += inc) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + float *ptr = interesting_floats + j; \ + Ret_type ret = func(m1, (Ptr_type)ptr); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(ptr, numElemsAccessed, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + +#define Ret_M128d_DoublePtr(Ret_type, func, Ptr_type, numElemsAccessed, inc) \ + for(int i = 0; i < numInterestingDoubles / 2; ++i) \ + for(int k = 0; k < 2; ++k) \ + for(int j = 0; j+numElemsAccessed <= numInterestingDoubles; j += inc) \ + { \ + __m128d m1 = E1(interesting_doubles, i*2+k, numInterestingDoubles); \ + double *ptr = interesting_doubles + j; \ + Ret_type ret = func(m1, (Ptr_type)ptr); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(ptr, numElemsAccessed, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + #define Ret_M128i(Ret_type, func) \ for(int i = 0; i < numInterestingInts / 4; ++i) \ for(int k = 0; k < 4; ++k) \ From 79d83cfe40dadf27aa51f22f1290a155825eb008 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Jun 2015 22:26:16 +0300 Subject: [PATCH 17/32] Add testing of SSE1 and SSE2 store instructions. --- tests/test_sse1_full.cpp | 21 ++++---- tests/test_sse2_full.cpp | 35 +++++++------ tests/test_sse_full.h | 107 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 133 insertions(+), 30 deletions(-) diff --git a/tests/test_sse1_full.cpp b/tests/test_sse1_full.cpp index 4095e6fb17f28..aa019346e6980 100644 --- a/tests/test_sse1_full.cpp +++ b/tests/test_sse1_full.cpp @@ -118,19 +118,16 @@ int main() Ret_M128_M128(__m128, _mm_min_ps); Ret_M128_M128(__m128, _mm_min_ss); -/* // SSE1 Store instructions: - _mm_store_ps - _mm_store_ps1 - _mm_store_ss - _mm_store1_ps - _mm_storeh_pi - _mm_storel_pi - _mm_storer_ps - _mm_storeu_ps - _mm_stream_pi - _mm_stream_ps -*/ + void_OutFloatPtr_M128(_mm_store_ps, float*, 16, 16); + void_OutFloatPtr_M128(_mm_store_ps1, float*, 16, 16); + void_OutFloatPtr_M128(_mm_store_ss, float*, 4, 1); + void_OutFloatPtr_M128(_mm_store1_ps, float*, 16, 16); + void_OutFloatPtr_M128(_mm_storeh_pi, __m64*, 8, 1); + void_OutFloatPtr_M128(_mm_storel_pi, __m64*, 8, 1); + void_OutFloatPtr_M128(_mm_storer_ps, float*, 16, 16); + void_OutFloatPtr_M128(_mm_storeu_ps, float*, 16, 1); + void_OutFloatPtr_M128(_mm_stream_ps, float*, 16, 16); // SSE1 Swizzle instructions: M128_M128_shuffle(); diff --git a/tests/test_sse2_full.cpp b/tests/test_sse2_full.cpp index 4c683463321db..b0005eac97cfa 100644 --- a/tests/test_sse2_full.cpp +++ b/tests/test_sse2_full.cpp @@ -248,25 +248,24 @@ int main() Ret_M128d_M128d(__m128d, _mm_min_pd); Ret_M128d_M128d(__m128d, _mm_min_sd); -/* // SSE2 Store instructions: - _mm_maskmoveu_si128 - _mm_store_pd - _mm_store_pd1 - _mm_store_sd - _mm_store_si128 - _mm_store1_pd - _mm_storeh_pd - _mm_storel_epi64 - _mm_storel_pd - _mm_storer_pd - _mm_storeu_pd - _mm_storeu_si128 - _mm_stream_pd - _mm_stream_si128 - _mm_stream_si32 - _mm_stream_si64 -*/ + void_M128i_M128i_OutIntPtr(_mm_maskmoveu_si128, char*, 16, 1); + void_OutDoublePtr_M128d(_mm_store_pd, double*, 16, 16); +// void_OutDoublePtr_M128d(_mm_store_pd1, double*, 16, 16); + void_OutDoublePtr_M128d(_mm_store_sd, double*, 8, 1); + void_OutIntPtr_M128(_mm_store_si128, __m128i*, 16, 16); + void_OutDoublePtr_M128d(_mm_store1_pd, double*, 16, 16); + void_OutDoublePtr_M128d(_mm_storeh_pd, double*, 8, 1); + void_OutIntPtr_M128(_mm_storel_epi64, __m128i*, 8, 1); + void_OutDoublePtr_M128d(_mm_storel_pd, double*, 8, 1); + void_OutDoublePtr_M128d(_mm_storer_pd, double*, 16, 16); + void_OutDoublePtr_M128d(_mm_storeu_pd, double*, 16, 1); + void_OutIntPtr_M128(_mm_storeu_si128, __m128i*, 16, 1); + void_OutDoublePtr_M128d(_mm_stream_pd, double*, 16, 16); + void_OutIntPtr_M128(_mm_stream_si128, __m128i*, 16, 16); + void_OutIntPtr_int(_mm_stream_si32, int*, 4, 1); + void_OutIntPtr_int64(_mm_stream_si64, int64_t*, 8, 1); + // SSE2 Swizzle instructions: /* _mm_extract_epi16 diff --git a/tests/test_sse_full.h b/tests/test_sse_full.h index 966ecea169ee6..00efb016f50a2 100644 --- a/tests/test_sse_full.h +++ b/tests/test_sse_full.h @@ -164,6 +164,15 @@ void tostr(int *s, int numElems, char *outstr) } } +void tostr(int64_t *m, int numElems, char *outstr) +{ + switch(numElems) + { + case 1: sprintf(outstr, "{0x%08X%08X}", (int)(*m >> 32), (int)*m); break; + case 2: sprintf(outstr, "{0x%08X%08X,0x%08X%08X}", (int)(*m >> 32), (int)*m, (int)(m[1] >> 32), (int)m[1]); + } +} + // Accessors to the test data in a way that the compiler can't optimize at compile-time. __attribute__((noinline)) float *get_interesting_floats() { @@ -237,6 +246,104 @@ __m128 ExtractInRandomOrder(float *arr, int i, int n, int prime) printf("%s(%s) = %s\n", #func, str, str2); \ } +float tempOutFloatStore[16]; +float *getTempOutFloatStore(int alignmentBytes) +{ + uintptr_t addr = (uintptr_t)tempOutFloatStore; + addr = (addr + alignmentBytes - 1) & ~(alignmentBytes-1); + return (float*)addr; +} + +int *getTempOutIntStore(int alignmentBytes) { return (int*)getTempOutFloatStore(alignmentBytes); } +double *getTempOutDoubleStore(int alignmentBytes) { return (double*)getTempOutFloatStore(alignmentBytes); } + +#define void_OutFloatPtr_M128(func, Ptr_type, numBytesWritten, alignmentBytes) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \ + for(int k = 0; k < 4; ++k) \ + { \ + uintptr_t base = (uintptr_t)getTempOutFloatStore(16); \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + float *out = (float*)(base + offset); \ + func((Ptr_type)out, m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(out, numBytesWritten/sizeof(float), str2); \ + printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \ + } + +#define void_OutDoublePtr_M128d(func, Ptr_type, numBytesWritten, alignmentBytes) \ + for(int i = 0; i < numInterestingDoubles / 2; ++i) \ + for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \ + for(int k = 0; k < 2; ++k) \ + { \ + uintptr_t base = (uintptr_t)getTempOutDoubleStore(16); \ + __m128d m1 = E1(interesting_doubles, i*2+k, numInterestingDoubles); \ + double *out = (double*)(base + offset); \ + func((Ptr_type)out, m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(out, numBytesWritten/sizeof(double), str2); \ + printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \ + } + +#define void_OutIntPtr_M128(func, Ptr_type, numBytesWritten, alignmentBytes) \ + for(int i = 0; i < numInterestingInts / 4; ++i) \ + for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \ + for(int k = 0; k < 4; ++k) \ + { \ + uintptr_t base = (uintptr_t)getTempOutIntStore(16); \ + __m128 m1 = E1(interesting_ints, i*4+k, numInterestingInts); \ + int *out = (int*)(base + offset); \ + func((Ptr_type)out, m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(out, numBytesWritten/sizeof(int), str2); \ + printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \ + } + +#define void_OutIntPtr_int(func, Ptr_type, numBytesWritten, alignmentBytes) \ + for(int i = 0; i < numInterestingInts; ++i) \ + for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \ + for(int k = 0; k < 4; ++k) \ + { \ + uintptr_t base = (uintptr_t)getTempOutIntStore(16); \ + int m1 = interesting_ints[i]; \ + int *out = (int*)(base + offset); \ + func((Ptr_type)out, m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(out, numBytesWritten/sizeof(int), str2); \ + printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \ + } + +#define void_OutIntPtr_int64(func, Ptr_type, numBytesWritten, alignmentBytes) \ + for(int i = 0; i < numInterestingInts; ++i) \ + for(int j = 0; j < numInterestingInts; ++j) \ + for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \ + { \ + uintptr_t base = (uintptr_t)getTempOutIntStore(16); \ + int64_t m1 = (int64_t)(((uint64_t)interesting_ints[i]) << 32 | (uint64_t)interesting_ints[j]); \ + int64_t *out = (int64_t*)(base + offset); \ + func((Ptr_type)out, m1); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(out, numBytesWritten/sizeof(int64_t), str2); \ + printf("%s(p:align=%d, %s) = %s\n", #func, offset, str, str2); \ + } + +#define void_M128i_M128i_OutIntPtr(func, Ptr_type, numBytesWritten, alignmentBytes) \ + for(int i = 0; i < numInterestingInts / 4; ++i) \ + for(int j = 0; j < numInterestingInts / 4; ++j) \ + for(int offset = 0; offset < numBytesWritten; offset += alignmentBytes) \ + for(int k = 0; k < 4; ++k) \ + { \ + uintptr_t base = (uintptr_t)getTempOutIntStore(16); \ + __m128d m1 = E1(interesting_ints, i*4+k, numInterestingInts); \ + __m128i m2 = E2(interesting_ints, j*4, numInterestingInts); \ + int *out = (int*)(base + offset); \ + func(m1, m2, (Ptr_type)out); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(out, numBytesWritten/sizeof(int), str3); \ + printf("%s(%s, %s, p:align=%d) = %s\n", #func, str, str2, offset, str3); \ + } + #define Ret_M128(Ret_type, func) \ for(int i = 0; i < numInterestingFloats / 4; ++i) \ for(int k = 0; k < 4; ++k) \ From 8ea6d43ad87955158e075bdc6196bd3f2cf28d5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Jun 2015 22:35:11 +0300 Subject: [PATCH 18/32] Add testing of last SSE2 convert instructions. --- tests/test_sse2_full.cpp | 4 ++-- tests/test_sse_full.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/tests/test_sse2_full.cpp b/tests/test_sse2_full.cpp index b0005eac97cfa..840771d0d6de5 100644 --- a/tests/test_sse2_full.cpp +++ b/tests/test_sse2_full.cpp @@ -128,9 +128,9 @@ int main() Ret_M128i(int, _mm_cvtsi128_si32); Ret_M128i(int64_t, _mm_cvtsi128_si64); // Ret_M128i(int64_t, _mm_cvtsi128_si64x); -// M128d_M128d_int(_mm_cvtsi32_sd); + Ret_M128d_int(__m128d, _mm_cvtsi32_sd); Ret_int(__m128i, _mm_cvtsi32_si128); -// M128d_M128d_int64(_mm_cvtsi64_sd); + Ret_M128d_int64(__m128d, _mm_cvtsi64_sd); Ret_int64(__m128i, _mm_cvtsi64_si128); // Ret_int64(__m128d, _mm_cvtsi64x_sd); // Ret_int64(__m128i, _mm_cvtsi64x_si128); diff --git a/tests/test_sse_full.h b/tests/test_sse_full.h index 00efb016f50a2..afb851f3cb4b0 100644 --- a/tests/test_sse_full.h +++ b/tests/test_sse_full.h @@ -225,6 +225,35 @@ __m128 ExtractInRandomOrder(float *arr, int i, int n, int prime) printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ } +#define Ret_M128d_int(Ret_type, func) \ + for(int i = 0; i < numInterestingDoubles / 2; ++i) \ + for(int k = 0; k < 2; ++k) \ + for(int j = 0; j < numInterestingInts; ++j) \ + { \ + __m128d m1 = E1(interesting_doubles, i*2+k, numInterestingDoubles); \ + int m2 = interesting_ints[j]; \ + Ret_type ret = func(m1, m2); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + +#define Ret_M128d_int64(Ret_type, func) \ + for(int i = 0; i < numInterestingDoubles / 2; ++i) \ + for(int k = 0; k < 2; ++k) \ + for(int j = 0; j < numInterestingInts; ++j) \ + for(int l = 0; l < numInterestingInts; ++l) \ + { \ + __m128d m1 = E1(interesting_doubles, i*2+k, numInterestingDoubles); \ + int64_t m2 = (int64_t)(((uint64_t)interesting_ints[j]) << 32 | (uint64_t)interesting_ints[l]); \ + Ret_type ret = func(m1, m2); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } + #define Ret_M128d(Ret_type, func) \ for(int i = 0; i < numInterestingDoubles / 2; ++i) \ for(int k = 0; k < 2; ++k) \ From ead260ce077aef960b8f11a00d48f8d6aba0de62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Jun 2015 23:18:48 +0300 Subject: [PATCH 19/32] Add testing for SSE2 Shift and Swizzle instructions. --- tests/test_sse1_full.cpp | 2 +- tests/test_sse2_full.cpp | 41 +++++++++-------- tests/test_sse_full.h | 96 ++++++++++++++++++++++++++++++++++------ 3 files changed, 104 insertions(+), 35 deletions(-) diff --git a/tests/test_sse1_full.cpp b/tests/test_sse1_full.cpp index aa019346e6980..6829f23c626e3 100644 --- a/tests/test_sse1_full.cpp +++ b/tests/test_sse1_full.cpp @@ -130,7 +130,7 @@ int main() void_OutFloatPtr_M128(_mm_stream_ps, float*, 16, 16); // SSE1 Swizzle instructions: - M128_M128_shuffle(); + Ret_M128_M128_Tint(__m128, _mm_shuffle_ps); // _MM_TRANSPOSE4_PS Ret_M128_M128(__m128, _mm_unpackhi_ps); Ret_M128_M128(__m128, _mm_unpacklo_ps); diff --git a/tests/test_sse2_full.cpp b/tests/test_sse2_full.cpp index 840771d0d6de5..f308668a01a5c 100644 --- a/tests/test_sse2_full.cpp +++ b/tests/test_sse2_full.cpp @@ -1,7 +1,7 @@ // This file uses SSE2 by calling different functions with different interesting inputs and prints the results. // Use a diff tool to compare the results between platforms. -#include +#include #define ENABLE_SSE2 #include "test_sse_full.h" @@ -217,26 +217,26 @@ int main() _mm_setzero_si128 */ // SSE2 Shift instructions: -// M128i_M128i_int(_mm_bslli_si128); -// M128i_M128i_int(_mm_bsrli_si128); +// Ret_M128i_Tint(__m128i, _mm_bslli_si128); +// Ret_M128i_Tint(__m128i, _mm_bsrli_si128); M128i_M128i_M128i(_mm_sll_epi16); M128i_M128i_M128i(_mm_sll_epi32); M128i_M128i_M128i(_mm_sll_epi64); -// M128i_M128i_int(_mm_slli_epi16); -// M128i_M128i_int(_mm_slli_epi32); -// M128i_M128i_int(_mm_slli_epi64); -// M128i_M128i_int(_mm_slli_si128); + Ret_M128i_Tint(__m128i, _mm_slli_epi16); + Ret_M128i_Tint(__m128i, _mm_slli_epi32); + Ret_M128i_Tint(__m128i, _mm_slli_epi64); + Ret_M128i_Tint(__m128i, _mm_slli_si128); M128i_M128i_M128i(_mm_sra_epi16); M128i_M128i_M128i(_mm_sra_epi32); -// M128i_M128i_int(_mm_srai_epi16); -// M128i_M128i_int(_mm_srai_epi32); + Ret_M128i_Tint(__m128i, _mm_srai_epi16); + Ret_M128i_Tint(__m128i, _mm_srai_epi32); M128i_M128i_M128i(_mm_srl_epi16); M128i_M128i_M128i(_mm_srl_epi32); M128i_M128i_M128i(_mm_srl_epi64); -// M128i_M128i_int(_mm_srli_epi16); -// M128i_M128i_int(_mm_srli_epi32); -// M128i_M128i_int(_mm_srli_epi64); -// M128i_M128i_int(_mm_srli_epi128); + Ret_M128i_Tint(__m128i, _mm_srli_epi16); + Ret_M128i_Tint(__m128i, _mm_srli_epi32); + Ret_M128i_Tint(__m128i, _mm_srli_epi64); +// Ret_M128i_Tint(__m128i, _mm_srli_epi128); // SSE2 Special Math instructions: M128i_M128i_M128i(_mm_max_epi16); @@ -267,14 +267,13 @@ int main() void_OutIntPtr_int64(_mm_stream_si64, int64_t*, 8, 1); // SSE2 Swizzle instructions: -/* - _mm_extract_epi16 - _mm_insert_epi16 - _mm_shuffle_epi32 - _mm_shuffle_pd - _mm_shufflehi_epi16 - _mm_shufflelo_epi16 -*/ + Ret_M128i_Tint(int, _mm_extract_epi16); + Ret_M128i_int_Tint(__m128i, _mm_insert_epi16); + Ret_M128i_Tint(__m128i, _mm_shuffle_epi32); + Ret_M128d_M128d_Tint(__m128d, _mm_shuffle_pd); + Ret_M128i_Tint(__m128i, _mm_shufflehi_epi16); + Ret_M128i_Tint(__m128i, _mm_shufflelo_epi16); + M128i_M128i_M128i(_mm_unpackhi_epi16); M128i_M128i_M128i(_mm_unpackhi_epi32); M128i_M128i_M128i(_mm_unpackhi_epi64); diff --git a/tests/test_sse_full.h b/tests/test_sse_full.h index afb851f3cb4b0..c0247487faf0c 100644 --- a/tests/test_sse_full.h +++ b/tests/test_sse_full.h @@ -211,6 +211,89 @@ __m128 ExtractInRandomOrder(float *arr, int i, int n, int prime) printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ } +#define Ret_M128i_Tint_body(Ret_type, func, Tint) \ + for(int i = 0; i < numInterestingInts / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + { \ + __m128i m1 = E1(interesting_ints, i*4+k, numInterestingInts); \ + Ret_type ret = func(m1, Tint); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s, %d) = %s\n", #func, str, Tint, str2); \ + } + +#define Ret_M128i_int_Tint_body(Ret_type, func, Tint) \ + for(int i = 0; i < numInterestingInts / 4; ++i) \ + for(int j = 0; j < numInterestingInts; ++j) \ + for(int k = 0; k < 4; ++k) \ + { \ + __m128i m1 = E1(interesting_ints, i*4+k, numInterestingInts); \ + Ret_type ret = func(m1, interesting_ints[j], Tint); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&ret, str2); \ + printf("%s(%s, 0x%08X, %d) = %s\n", #func, str, interesting_ints[j], Tint, str2); \ + } + +#define Ret_M128d_M128d_Tint_body(Ret_type, func, Tint) \ + for(int i = 0; i < numInterestingDoubles / 2; ++i) \ + for(int k = 0; k < 2; ++k) \ + for(int j = 0; j < numInterestingDoubles / 2; ++j) \ + { \ + __m128d m1 = E1(interesting_doubles, i*2+k, numInterestingDoubles); \ + __m128d m2 = E2(interesting_doubles, j*2, numInterestingDoubles); \ + Ret_type ret = func(m1, m2, Tint); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s, %d) = %s\n", #func, str, str2, Tint, str3); \ + } + +#define Ret_M128_M128_Tint_body(Ret_type, func, Tint) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j < numInterestingFloats / 4; ++j) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \ + Ret_type ret = func(m1, m2, Tint); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s, %d) = %s\n", #func, str, str2, Tint, str3); \ + } + +#define const_int8_unroll(Ret_type, F, func) \ + F(Ret_type, func, -1); \ + F(Ret_type, func, 0); \ + F(Ret_type, func, 1); \ + F(Ret_type, func, 2); \ + F(Ret_type, func, 3); \ + F(Ret_type, func, 5); \ + F(Ret_type, func, 7); \ + F(Ret_type, func, 11); \ + F(Ret_type, func, 13); \ + F(Ret_type, func, 15); \ + F(Ret_type, func, 16); \ + F(Ret_type, func, 17); \ + F(Ret_type, func, 23); \ + F(Ret_type, func, 29); \ + F(Ret_type, func, 31); \ + F(Ret_type, func, 37); \ + F(Ret_type, func, 43); \ + F(Ret_type, func, 47); \ + F(Ret_type, func, 59); \ + F(Ret_type, func, 127); \ + F(Ret_type, func, 128); \ + F(Ret_type, func, 191); \ + F(Ret_type, func, 254); \ + F(Ret_type, func, 255); \ + F(Ret_type, func, 309); + +#define Ret_M128i_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_Tint_body, func) +#define Ret_M128i_int_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128i_int_Tint_body, func) +#define Ret_M128d_M128d_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128d_M128d_Tint_body, func) +#define Ret_M128_M128_Tint(Ret_type, func) const_int8_unroll(Ret_type, Ret_M128_M128_Tint_body, func) + #define Ret_M128d_M128d(Ret_type, func) \ for(int i = 0; i < numInterestingDoubles / 2; ++i) \ for(int k = 0; k < 2; ++k) \ @@ -476,16 +559,3 @@ double *getTempOutDoubleStore(int alignmentBytes) { return (double*)getTempOutFl char str3[256]; tostr(&ret, str3); \ printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ } - -#define M128_M128_shuffle() \ - for(int i = 0; i < numInterestingFloats / 4; ++i) \ - for(int k = 0; k < 4; ++k) \ - for(int j = 0; j < numInterestingFloats / 4; ++j) \ - { \ - __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ - __m128 m2 = E2(interesting_floats, j*4, numInterestingFloats); \ - __m128 ret = _mm_shuffle_ps(m1, m2, _MM_SHUFFLE(1, 3, 0, 2)); \ - char str[256], str2[256], str3[256]; \ - tostr(&m1, str); tostr(&m2, str2); tostr(&ret, str3); \ - printf("%s(%s, %s) = %s\n", "_mm_shuffle_ps", str, str2, str3); \ - } From 72e6cc388b3a51a6f77321e2e95620412558a2b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Jun 2015 23:22:04 +0300 Subject: [PATCH 20/32] Add testing for SSE1 int cvt instructions. --- tests/test_sse1_full.cpp | 8 ++++++-- tests/test_sse_full.h | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/tests/test_sse1_full.cpp b/tests/test_sse1_full.cpp index 6829f23c626e3..c5efdb3f57cb1 100644 --- a/tests/test_sse1_full.cpp +++ b/tests/test_sse1_full.cpp @@ -10,6 +10,10 @@ int main() int numInterestingFloats = sizeof(interesting_floats_)/sizeof(interesting_floats_[0]); assert(numInterestingFloats % 4 == 0); + uint32_t *interesting_ints = get_interesting_ints(); + int numInterestingInts = sizeof(interesting_ints_)/sizeof(interesting_ints_[0]); + assert(numInterestingInts % 4 == 0); + // SSE1 Arithmetic instructions: Ret_M128_M128(__m128, _mm_add_ps); Ret_M128_M128(__m128, _mm_add_ss); @@ -74,9 +78,9 @@ int main() Ret_M128_M128(int, _mm_ucomineq_ss); // SSE1 Convert instructions: -// M128_M128_int(_mm_cvt_si2ss); + Ret_M128_int(__m128, _mm_cvt_si2ss); Ret_M128(int, _mm_cvt_ss2si); -// M128_M128_int(_mm_cvtsi32_ss); + Ret_M128_int(__m128, _mm_cvtsi32_ss); Ret_M128(float, _mm_cvtss_f32); Ret_M128(int, _mm_cvtss_si32); Ret_M128(int64_t, _mm_cvtss_si64); diff --git a/tests/test_sse_full.h b/tests/test_sse_full.h index c0247487faf0c..ba03509b13618 100644 --- a/tests/test_sse_full.h +++ b/tests/test_sse_full.h @@ -559,3 +559,17 @@ double *getTempOutDoubleStore(int alignmentBytes) { return (double*)getTempOutFl char str3[256]; tostr(&ret, str3); \ printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ } + +#define Ret_M128_int(Ret_type, func) \ + for(int i = 0; i < numInterestingFloats / 4; ++i) \ + for(int k = 0; k < 4; ++k) \ + for(int j = 0; j < numInterestingInts; ++j) \ + { \ + __m128 m1 = E1(interesting_floats, i*4+k, numInterestingFloats); \ + int m2 = interesting_ints[j]; \ + Ret_type ret = func(m1, m2); \ + char str[256]; tostr(&m1, str); \ + char str2[256]; tostr(&m2, str2); \ + char str3[256]; tostr(&ret, str3); \ + printf("%s(%s, %s) = %s\n", #func, str, str2, str3); \ + } From b424dc0d851851913262ac85ef6f68162dff67a7 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sun, 14 Jun 2015 10:57:09 -0700 Subject: [PATCH 21/32] Fix shared libraries in autoconf projects on Linux host Partial revert of commit 53a969d, plus addition of comment explaining why the verbiage about ld compatibility is there. Added a check for the 'GNU' token in 'emcc -v' output to tests. --- AUTHORS | 1 + emcc | 3 ++- tests/test_other.py | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 6ab0b936eacef..26c8c24f799d2 100644 --- a/AUTHORS +++ b/AUTHORS @@ -193,3 +193,4 @@ a license to everyone to use it as detailed in LICENSE.) * Tim Guan-tin Chien * Krzysztof Jakubowski * Vladimír Vondruš +* Brion Vibber diff --git a/emcc b/emcc index 0559dd6033788..ec9960f3ecf29 100755 --- a/emcc +++ b/emcc @@ -172,7 +172,8 @@ There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR P exit(0) elif len(sys.argv) == 2 and sys.argv[1] == '-v': # -v with no inputs - print 'emcc (Emscripten gcc/clang-like replacement) %s' % shared.EMSCRIPTEN_VERSION + # autoconf likes to see 'GNU' in the output to enable shared object support + print 'emcc (Emscripten gcc/clang-like replacement + linker emulating GNU ld) %s' % shared.EMSCRIPTEN_VERSION code = subprocess.call([shared.CLANG, '-v']) shared.check_sanity(force=True) exit(code) diff --git a/tests/test_other.py b/tests/test_other.py index 62d186fbcf7ba..b8ba16633479a 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -22,6 +22,7 @@ def test_emcc(self): # -v, without input files output = Popen([PYTHON, compiler, '-v'], stdout=PIPE, stderr=PIPE).communicate() self.assertContained('''clang version''', output[1].replace('\r', ''), output[1].replace('\r', '')) + self.assertContained('''GNU''', output[0]) # --help output = Popen([PYTHON, compiler, '--help'], stdout=PIPE, stderr=PIPE).communicate() From 8f44c9f77a496a299ce602e5fd6f345338135069 Mon Sep 17 00:00:00 2001 From: Martin von Gagern Date: Thu, 28 May 2015 20:51:34 +0200 Subject: [PATCH 22/32] Store memory initialization in string literal if requested MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With “--memory-init-file 0 -s MEM_INIT_METHOD=2” on the command line, the generated JavaScript file will contain a string literal representing the initial content of the memory buffer. The MEM_INIT_METHOD defaults to 0 but gets set to 1 if --memory-init-file is being used. Setting it to 1 without --memory-init-file will cause an error. That way, we can use the setting in the postamble, without too many changes in other places. Since memory is initialized to all zero, trailing zeros can be omitted. This change affects the file-based initialization as well. --- emcc | 20 ++++++++++++++++++-- src/postamble.js | 12 ++++++++++++ src/settings.js | 5 +++++ tests/runner.py | 16 +++++++++------- tests/test_core.py | 3 ++- 5 files changed, 46 insertions(+), 10 deletions(-) diff --git a/emcc b/emcc index 025daaf9dcecc..f6acd68de6612 100755 --- a/emcc +++ b/emcc @@ -1289,6 +1289,10 @@ try: # Emscripten logging.debug('LLVM => JS') extra_args = [] if not js_libraries else ['--libraries', ','.join(map(os.path.abspath, js_libraries))] + if memory_init_file: + shared.Settings.MEM_INIT_METHOD = 1 + elif shared.Settings.MEM_INIT_METHOD == 1: + shared.Settings.MEM_INIT_METHOD = 0 final = shared.Building.emscripten(final, append_ext=False, extra_args=extra_args) if DEBUG: save_intermediate('original') @@ -1340,14 +1344,26 @@ try: js_transform_tempfiles = [final] - if memory_init_file: + if memory_init_file or shared.Settings.MEM_INIT_METHOD == 2: memfile = target + '.mem' shared.try_delete(memfile) def repl(m): # handle chunking of the memory initializer s = m.groups(0)[0] if len(s) == 0 and not shared.Settings.EMTERPRETIFY: return m.group(0) # emterpreter must have a mem init file; otherwise, don't emit 0-size ones - open(memfile, 'wb').write(''.join(map(lambda x: chr(int(x or '0')), s.split(',')))) + membytes = [int(x or '0') for x in s.split(',')] + if not shared.Settings.EMTERPRETIFY: + while membytes and membytes[-1] == 0: + membytes.pop() + if not membytes: + return ''; + membytes = ''.join(map(chr, membytes)) + if not memory_init_file: + s = repr(membytes) + hex_to_octal = lambda x: '\\%o' % int(x.group(1), 16) + s = re.sub(r'\\x([0-1][0-9A-Fa-f])(?:(?=[^0-9])|$)', hex_to_octal, s) + return 'var memoryInitializer = %s;' % s + open(memfile, 'wb').write(membytes) if DEBUG: # Copy into temp dir as well, so can be run there too shared.safe_copy(memfile, os.path.join(shared.get_emscripten_temp_dir(), os.path.basename(memfile))) diff --git a/src/postamble.js b/src/postamble.js index abc9510f2d649..2571cb24a403b 100644 --- a/src/postamble.js +++ b/src/postamble.js @@ -1,6 +1,16 @@ // === Auto-generated postamble setup entry stuff === +#if MEM_INIT_METHOD == 2 +#if USE_PTHREADS +if (memoryInitializer && !ENVIRONMENT_IS_PTHREAD) (function(s) { +#else +if (memoryInitializer) (function(s) { +#endif + for (var i = 0; i < s.length; ++i) HEAPU8[STATIC_BASE + i] = s.charCodeAt(i); +})(memoryInitializer); +#else +#if MEM_INIT_METHOD == 1 #if USE_PTHREADS if (memoryInitializer && !ENVIRONMENT_IS_PTHREAD) { #else @@ -52,6 +62,8 @@ if (memoryInitializer) { } } } +#endif +#endif function ExitStatus(status) { this.name = "ExitStatus"; diff --git a/src/settings.js b/src/settings.js index b4868d7fd2857..d7a637c636575 100644 --- a/src/settings.js +++ b/src/settings.js @@ -35,6 +35,11 @@ var INVOKE_RUN = 1; // Whether we will run the main() function. Disable if you e // can do with Module.callMain(), with an optional parameter of commandline args). var NO_EXIT_RUNTIME = 0; // If set, the runtime is not quit when main() completes (allowing code to // run afterwards, for example from the browser main event loop). +var MEM_INIT_METHOD = 0; // How to represent the initial memory content. + // 0: keep array literal representing the initial memory data + // 1: create a *.mem file containing the binary data of the initial memory; + // use the --memory-init-file command line switch to select this method + // 2: embed a string literal representing that initial memory data var TOTAL_STACK = 5*1024*1024; // The total stack size. There is no way to enlarge the stack, so this // value must be large enough for the program's requirements. If // assertions are on, we will assert on not exceeding this, otherwise, diff --git a/tests/runner.py b/tests/runner.py index 98336b7c76911..a7c6428ba0a6e 100755 --- a/tests/runner.py +++ b/tests/runner.py @@ -62,6 +62,14 @@ def skipme(self): # used by tests we ask on the commandline to be skipped, see r def is_emterpreter(self): return False + def uses_memory_init_file(self): + if self.emcc_args is None: + return None + elif '--memory-init-file' in self.emcc_args: + return int(self.emcc_args[self.emcc_args.index('--memory-init-file')+1]) + else: + return ('-O2' in self.emcc_args or '-O3' in self.emcc_args or '-Oz' in self.emcc_args) and not Settings.SIDE_MODULE + def setUp(self): Settings.reset() self.banned_js_engines = [] @@ -252,16 +260,10 @@ def build(self, src, dirname, filename, output_processor=None, main_file=None, a output_processor(open(filename + '.o.js').read()) if self.emcc_args is not None: - if '--memory-init-file' in self.emcc_args: - memory_init_file = int(self.emcc_args[self.emcc_args.index('--memory-init-file')+1]) - else: - memory_init_file = ('-O2' in self.emcc_args or '-O3' in self.emcc_args or '-Oz' in self.emcc_args) and not Settings.SIDE_MODULE src = open(filename + '.o.js').read() - if memory_init_file: + if self.uses_memory_init_file(): # side memory init file, or an empty one in the js assert ('/* memory initializer */' not in src) or ('/* memory initializer */ allocate([]' in src) - else: - assert 'memory initializer */' in src or '/*' not in src # memory initializer comment, or cleaned-up source with no comments def validate_asmjs(self, err): if 'uccessfully compiled asm.js code' in err and 'asm.js link error' not in err: diff --git a/tests/test_core.py b/tests/test_core.py index be17f34be11d5..5d6b7792c37b4 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -4549,7 +4549,7 @@ def process(filename): try_delete(mem_file) self.do_run(src, ('size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\n5 bytes to dev/null: 5\nok.\n \ntexte\n', 'size: 7\ndata: 100,-56,50,25,10,77,123\nloop: 100 -56 50 25 10 77 123 \ninput:hi there!\ntexto\ntexte\n$\n5 : 10,30,20,11,88\nother=some data.\nseeked=me da.\nseeked=ata.\nseeked=ta.\nfscanfed: 10 - hello\n5 bytes to dev/null: 5\nok.\n'), post_build=post, extra_emscripten_args=['-H', 'libc/fcntl.h']) - if '-O2' in self.emcc_args: + if self.uses_memory_init_file(): assert os.path.exists(mem_file) def test_files_m(self): @@ -7387,6 +7387,7 @@ def setUp(self): asm2g = make_run("asm2g", compiler=CLANG, emcc_args=["-O2", "-g", "-s", "ASSERTIONS=1", "-s", "SAFE_HEAP=1"]) asm1i = make_run("asm1i", compiler=CLANG, emcc_args=["-O1", '-s', 'EMTERPRETIFY=1']) asm3i = make_run("asm3i", compiler=CLANG, emcc_args=["-O3", '-s', 'EMTERPRETIFY=1']) +asm2m = make_run("asm2m", compiler=CLANG, emcc_args=["-O2", "--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2"]) # Legacy test modes - asm2nn = make_run("asm2nn", compiler=CLANG, emcc_args=["-O2"], env={"EMCC_NATIVE_OPTIMIZER": "0"}) From c388aeebe7bb7c05d70cd30b9387ec9a85ac3962 Mon Sep 17 00:00:00 2001 From: Martin von Gagern Date: Fri, 29 May 2015 10:11:45 +0200 Subject: [PATCH 23/32] Avoid octal escapes, use raw bytes instead There was a bug where the hex-to-oct conversion would match \\x01. But support for octal escape sequences is optional in any case, and forbidden in strict mode, so we should avoid using these. As per the ECMAScript 5.1 spec, any source character (which may be any unicode code point) can be used inside a string literal, with the exception of backslash, line terminator or the quoting character. So we do just that: dump a lot of raw bytes into the string literal and escape only what needs to be escaped. There is one catch, though: sources are usually encoded in UTF-8, in which case we can't exactly plug in raw bytes, but have to use UTF-8 sequences for the range \x80 through \xff. This may cause problems if the source file is NOT interpreted as UTF-8. --- emcc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/emcc b/emcc index f6acd68de6612..c3cb050e336ba 100755 --- a/emcc +++ b/emcc @@ -1359,10 +1359,11 @@ try: return ''; membytes = ''.join(map(chr, membytes)) if not memory_init_file: - s = repr(membytes) - hex_to_octal = lambda x: '\\%o' % int(x.group(1), 16) - s = re.sub(r'\\x([0-1][0-9A-Fa-f])(?:(?=[^0-9])|$)', hex_to_octal, s) - return 'var memoryInitializer = %s;' % s + s = membytes + s = s.replace('\\', '\\\\').replace("'", "\\'") + s = s.replace('\n', '\\n').replace('\r', '\\r') + s = s.decode('latin1').encode('utf8') + return "var memoryInitializer = '%s';" % s open(memfile, 'wb').write(membytes) if DEBUG: # Copy into temp dir as well, so can be run there too From 6d3b9ff87a8d550862dfb4dcdd82b0c276f9f244 Mon Sep 17 00:00:00 2001 From: Martin von Gagern Date: Mon, 8 Jun 2015 22:48:02 +0200 Subject: [PATCH 24/32] Verify integrity of memory initializer using CRC-32 This is almost the standard CRC-32 algorithm, except that we omit the final XOR with -1 so that we can easily compare the result against zero. The length of the initializer is included in the data so that we don't have to worry about leading zeros (after XOR with the init value of -1). Useful read: http://www.ross.net/crc/download/crc_v3.txt --- emcc | 20 +++++++++++++++++--- src/postamble.js | 18 +++++++++++++++++- tests/test_other.py | 14 ++++++++++++++ 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/emcc b/emcc index c3cb050e336ba..cc89065396806 100755 --- a/emcc +++ b/emcc @@ -1357,14 +1357,28 @@ try: membytes.pop() if not membytes: return ''; - membytes = ''.join(map(chr, membytes)) if not memory_init_file: - s = membytes + crcTable = [] + for i in range(256): + crc = i + for bit in range(8): + crc = (crc >> 1) ^ ((crc & 1) * 0xedb88320) + crcTable.append(crc) + crc = 0xffffffff + s = list(membytes) + n = len(s) + crc = crcTable[(crc ^ n) & 0xff] ^ (crc >> 8) + crc = crcTable[(crc ^ (n >> 8)) & 0xff] ^ (crc >> 8) + for i in s: + crc = crcTable[(crc ^ i) & 0xff] ^ (crc >> 8) + for i in range(4): + s.append((crc >> (8 * i)) & 0xff) + s = ''.join(map(chr, s)) s = s.replace('\\', '\\\\').replace("'", "\\'") s = s.replace('\n', '\\n').replace('\r', '\\r') s = s.decode('latin1').encode('utf8') return "var memoryInitializer = '%s';" % s - open(memfile, 'wb').write(membytes) + open(memfile, 'wb').write(''.join(map(chr, membytes))) if DEBUG: # Copy into temp dir as well, so can be run there too shared.safe_copy(memfile, os.path.join(shared.get_emscripten_temp_dir(), os.path.basename(memfile))) diff --git a/src/postamble.js b/src/postamble.js index 2571cb24a403b..e64710d6701dc 100644 --- a/src/postamble.js +++ b/src/postamble.js @@ -7,7 +7,23 @@ if (memoryInitializer && !ENVIRONMENT_IS_PTHREAD) (function(s) { #else if (memoryInitializer) (function(s) { #endif - for (var i = 0; i < s.length; ++i) HEAPU8[STATIC_BASE + i] = s.charCodeAt(i); + var i, n = s.length - 4; +#if ASSERTIONS + var crc, bit, table = new Int32Array(256); + for (i = 0; i < 256; ++i) { + for (crc = i, bit = 0; bit < 8; ++bit) + crc = (crc >>> 1) ^ ((crc & 1) * 0xedb88320); + table[i] = crc >>> 0; + } + crc = -1; + crc = table[(crc ^ n) & 0xff] ^ (crc >>> 8); + crc = table[(crc ^ (n >>> 8)) & 0xff] ^ (crc >>> 8); + for (i = 0; i < s.length; ++i) + crc = table[(crc ^ s.charCodeAt(i)) & 0xff] ^ (crc >>> 8); + assert(crc === 0, "memory initializer checksum"); +#endif + for (i = 0; i < n; ++i) + HEAPU8[STATIC_BASE + i] = s.charCodeAt(i); })(memoryInitializer); #else #if MEM_INIT_METHOD == 1 diff --git a/tests/test_other.py b/tests/test_other.py index 28f949eeb7fc9..9ac33f0aacb53 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -4879,3 +4879,17 @@ def test_debug_asmLastOpts(self): out, err = Popen([PYTHON, EMCC, 'src.c', '-s', 'EXPORTED_FUNCTIONS=["_main", "_treecount"]', '--minify', '0', '-g4', '-Oz']).communicate() self.assertContained('hello, world!', run_js('a.out.js')) + def test_meminit_crc(self): + with open('src.c', 'w') as f: + f.write(r''' +#include +int main() { printf("Mary had a little lamb.\n"); } +''') + out, err = Popen([PYTHON, EMCC, 'src.c', '-O2', '--memory-init-file', '0', '-s', 'MEM_INIT_METHOD=2', '-s', 'ASSERTIONS=1']).communicate() + with open('a.out.js', 'r') as f: + d = f.read() + d = d.replace('Mary had', 'Paul had') + with open('a.out.js', 'w') as f: + f.write(d) + out = run_js('a.out.js', assert_returncode=None, stderr=subprocess.STDOUT) + self.assertContained('Assertion failed: memory initializer checksum', out) From e2d59af99dcb1458b0d4f2c41bb4ad50253c4c92 Mon Sep 17 00:00:00 2001 From: Martin von Gagern Date: Mon, 8 Jun 2015 23:02:23 +0200 Subject: [PATCH 25/32] Use hex escapes for non-ASCII bytes This makes the resulting literals more independent from the character encoding the environment assumes for the resulting file. It requires slightly more memory, but large bytes are far less common than small bytes (zero in particular), so the cost should not be too much. If we want to, we can still make this optional later on. --- emcc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/emcc b/emcc index cc89065396806..a63543a202bd8 100755 --- a/emcc +++ b/emcc @@ -1376,7 +1376,8 @@ try: s = ''.join(map(chr, s)) s = s.replace('\\', '\\\\').replace("'", "\\'") s = s.replace('\n', '\\n').replace('\r', '\\r') - s = s.decode('latin1').encode('utf8') + def escape(x): return '\\x{:02x}'.format(ord(x.group())) + s = re.sub('[\x80-\xff]', escape, s) return "var memoryInitializer = '%s';" % s open(memfile, 'wb').write(''.join(map(chr, membytes))) if DEBUG: From f5bc4226bf4f54422951ddf2e69eda2e0f1631bc Mon Sep 17 00:00:00 2001 From: Martin von Gagern Date: Thu, 11 Jun 2015 00:27:55 +0200 Subject: [PATCH 26/32] Incorporate suggestions from code review * MEM_INIT_METHOD != 1 with --with-memory-file 1 now triggers an assertion * Consistently return '' instead of m.group(0) if there is no initializer * Strip trailing zeros for emterpreter as well * Include crc32 in literal only if it gets verified * Enable assertions for the asm2m test run in general * Disable assertions for one test case, fnmatch, to cover that as well * Include the asm2m run name in two lists of run modes * Add browser test to verify all pairs of bytes get encoded correctly * Add browser test to verify that a >32M initializer works without chunking * Omit duplicate var declaration for the memoryInitializer variable * Minor comments and syntax improvements * Capture the memory_init_file setting by its MEM_INIT_METHOD value. * Drop special handling for emterpreter, which shouldn't be needed any more. --- emcc | 53 +++++++++++++++++++------------------ src/postamble.js | 9 ++++--- tests/meminit_pairs.c | 18 +++++++++++++ tests/parallel_test_core.py | 2 +- tests/runner.py | 2 +- tests/test_browser.py | 23 ++++++++++++++++ tests/test_core.py | 9 ++++++- 7 files changed, 84 insertions(+), 32 deletions(-) create mode 100644 tests/meminit_pairs.c diff --git a/emcc b/emcc index a63543a202bd8..74b8905349dbf 100755 --- a/emcc +++ b/emcc @@ -1291,8 +1291,8 @@ try: extra_args = [] if not js_libraries else ['--libraries', ','.join(map(os.path.abspath, js_libraries))] if memory_init_file: shared.Settings.MEM_INIT_METHOD = 1 - elif shared.Settings.MEM_INIT_METHOD == 1: - shared.Settings.MEM_INIT_METHOD = 0 + else: + assert shared.Settings.MEM_INIT_METHOD != 1 final = shared.Building.emscripten(final, append_ext=False, extra_args=extra_args) if DEBUG: save_intermediate('original') @@ -1344,46 +1344,47 @@ try: js_transform_tempfiles = [final] - if memory_init_file or shared.Settings.MEM_INIT_METHOD == 2: + if shared.Settings.MEM_INIT_METHOD > 0: memfile = target + '.mem' shared.try_delete(memfile) def repl(m): # handle chunking of the memory initializer - s = m.groups(0)[0] - if len(s) == 0 and not shared.Settings.EMTERPRETIFY: return m.group(0) # emterpreter must have a mem init file; otherwise, don't emit 0-size ones + s = m.group(1) + if len(s) == 0: return '' # don't emit 0-size ones membytes = [int(x or '0') for x in s.split(',')] - if not shared.Settings.EMTERPRETIFY: - while membytes and membytes[-1] == 0: - membytes.pop() - if not membytes: - return ''; + while membytes and membytes[-1] == 0: + membytes.pop() + if not membytes: return '' if not memory_init_file: - crcTable = [] - for i in range(256): - crc = i - for bit in range(8): - crc = (crc >> 1) ^ ((crc & 1) * 0xedb88320) - crcTable.append(crc) - crc = 0xffffffff + # memory initializer in a string literal s = list(membytes) - n = len(s) - crc = crcTable[(crc ^ n) & 0xff] ^ (crc >> 8) - crc = crcTable[(crc ^ (n >> 8)) & 0xff] ^ (crc >> 8) - for i in s: - crc = crcTable[(crc ^ i) & 0xff] ^ (crc >> 8) - for i in range(4): - s.append((crc >> (8 * i)) & 0xff) + if shared.Settings.ASSERTIONS: + # append checksum of length and content + crcTable = [] + for i in range(256): + crc = i + for bit in range(8): + crc = (crc >> 1) ^ ((crc & 1) * 0xedb88320) + crcTable.append(crc) + crc = 0xffffffff + n = len(s) + crc = crcTable[(crc ^ n) & 0xff] ^ (crc >> 8) + crc = crcTable[(crc ^ (n >> 8)) & 0xff] ^ (crc >> 8) + for i in s: + crc = crcTable[(crc ^ i) & 0xff] ^ (crc >> 8) + for i in range(4): + s.append((crc >> (8 * i)) & 0xff) s = ''.join(map(chr, s)) s = s.replace('\\', '\\\\').replace("'", "\\'") s = s.replace('\n', '\\n').replace('\r', '\\r') def escape(x): return '\\x{:02x}'.format(ord(x.group())) s = re.sub('[\x80-\xff]', escape, s) - return "var memoryInitializer = '%s';" % s + return "memoryInitializer = '%s';" % s open(memfile, 'wb').write(''.join(map(chr, membytes))) if DEBUG: # Copy into temp dir as well, so can be run there too shared.safe_copy(memfile, os.path.join(shared.get_emscripten_temp_dir(), os.path.basename(memfile))) - return 'var memoryInitializer = "%s";' % os.path.basename(memfile) + return 'memoryInitializer = "%s";' % os.path.basename(memfile) src = re.sub(shared.JS.memory_initializer_pattern, repl, open(final).read(), count=1) open(final + '.mem.js', 'w').write(src) final += '.mem.js' diff --git a/src/postamble.js b/src/postamble.js index e64710d6701dc..989c4a5a508be 100644 --- a/src/postamble.js +++ b/src/postamble.js @@ -7,8 +7,9 @@ if (memoryInitializer && !ENVIRONMENT_IS_PTHREAD) (function(s) { #else if (memoryInitializer) (function(s) { #endif - var i, n = s.length - 4; + var i, n = s.length; #if ASSERTIONS + n -= 4; var crc, bit, table = new Int32Array(256); for (i = 0; i < 256; ++i) { for (crc = i, bit = 0; bit < 8; ++bit) @@ -18,12 +19,14 @@ if (memoryInitializer) (function(s) { crc = -1; crc = table[(crc ^ n) & 0xff] ^ (crc >>> 8); crc = table[(crc ^ (n >>> 8)) & 0xff] ^ (crc >>> 8); - for (i = 0; i < s.length; ++i) + for (i = 0; i < s.length; ++i) { crc = table[(crc ^ s.charCodeAt(i)) & 0xff] ^ (crc >>> 8); + } assert(crc === 0, "memory initializer checksum"); #endif - for (i = 0; i < n; ++i) + for (i = 0; i < n; ++i) { HEAPU8[STATIC_BASE + i] = s.charCodeAt(i); + } })(memoryInitializer); #else #if MEM_INIT_METHOD == 1 diff --git a/tests/meminit_pairs.c b/tests/meminit_pairs.c new file mode 100644 index 0000000000000..32f087c571369 --- /dev/null +++ b/tests/meminit_pairs.c @@ -0,0 +1,18 @@ +unsigned char problematic[] = { 0x20, 0x7c, 0x02, 0x07, 0x5f, 0xa0, 0xdf }; +int main() { + unsigned char a, b; + int result = 0, i, j; + for (i = 0; i < sizeof(problematic); ++i) { + a = problematic[i] ^ 32; + for (j = 0; j < sizeof(problematic); ++j) { + b = problematic[j] ^ 32; + if (((const unsigned char)data[a][2*b]) != a || + ((const unsigned char)data[a][2*b + 1]) != b) { + result = 1; + printf("data[0x%02x][0x%03x]=%x02x\n", a, 2*b, data[a][2*b]); + printf("data[0x%02x][0x%03x]=%x02x\n", a, 2*b + 1, data[a][2*b + 1]); + } + } + } + REPORT_RESULT() +} diff --git a/tests/parallel_test_core.py b/tests/parallel_test_core.py index 94e0ed26de1d6..98aca0450f5d2 100755 --- a/tests/parallel_test_core.py +++ b/tests/parallel_test_core.py @@ -14,7 +14,7 @@ assert not os.environ.get('EM_SAVE_DIR'), 'Need separate directories to avoid the parallel tests clashing' # run slower ones first, to optimize total time -optimal_order = ['asm3i', 'asm1i', 'asm2nn', 'asm3', 'asm2', 'asm2g', 'asm2f', 'asm1', 'default'] +optimal_order = ['asm3i', 'asm1i', 'asm2nn', 'asm3', 'asm2', 'asm2m', 'asm2g', 'asm2f', 'asm1', 'default'] assert set(optimal_order) == set(test_modes), 'need to update the list of slowest modes' # set up a background thread to report progress diff --git a/tests/runner.py b/tests/runner.py index a7c6428ba0a6e..ea5d7747d8003 100755 --- a/tests/runner.py +++ b/tests/runner.py @@ -38,7 +38,7 @@ def path_from_root(*pathelems): # Core test runner class, shared between normal tests and benchmarks checked_sanity = False -test_modes = ['default', 'asm1', 'asm2', 'asm3', 'asm2f', 'asm2g', 'asm1i', 'asm3i', 'asm2nn'] +test_modes = ['default', 'asm1', 'asm2', 'asm3', 'asm2f', 'asm2g', 'asm1i', 'asm3i', 'asm2m', 'asm2nn'] test_index = 0 use_all_engines = os.environ.get('EM_ALL_ENGINES') # generally js engines are equivalent, testing 1 is enough. set this diff --git a/tests/test_browser.py b/tests/test_browser.py index d38771e55d85c..05af5a12bc70d 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -2637,3 +2637,26 @@ def test_pthread_file_io(self): # Test that it is possible to send a signal via calling alarm(timeout), which in turn calls to the signal handler set by signal(SIGALRM, func); def test_sigalrm(self): self.btest(path_from_root('tests', 'sigalrm.cpp'), expected='0', args=['-O3']) + + def test_meminit_pairs(self): + d = 'const char *data[] = {\n "' + d += '",\n "'.join(''.join('\\x{:02x}\\x{:02x}'.format(i, j) + for j in range(256)) for i in range(256)) + with open(path_from_root('tests', 'meminit_pairs.c')) as f: + d += '"\n};\n' + f.read() + args = ["-O2", "--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2", "-s", "ASSERTIONS=1"] + self.btest(d, expected='0', args=args + ["--closure", "0"]) + self.btest(d, expected='0', args=args + ["--closure", "0", "-g"]) + self.btest(d, expected='0', args=args + ["--closure", "1"]) + + def test_meminit_big(self): + d = 'const char *data[] = {\n "' + d += '",\n "'.join([''.join('\\x{:02x}\\x{:02x}'.format(i, j) + for j in range(256)) for i in range(256)]*256) + with open(path_from_root('tests', 'meminit_pairs.c')) as f: + d += '"\n};\n' + f.read() + assert len(d) > (1 << 27) # more than 32M memory initializer + args = ["-O2", "--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2", "-s", "ASSERTIONS=1"] + self.btest(d, expected='0', args=args + ["--closure", "0"]) + self.btest(d, expected='0', args=args + ["--closure", "0", "-g"]) + self.btest(d, expected='0', args=args + ["--closure", "1"]) diff --git a/tests/test_core.py b/tests/test_core.py index 5d6b7792c37b4..33e7a093bc2f7 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -4374,6 +4374,13 @@ def test_strstr(self): self.do_run_from_file(src, output) def test_fnmatch(self): + # Run one test without assertions, for additional coverage + assert 'asm2m' in test_modes + if self.run_name == 'asm2m': + i = self.emcc_args.index('ASSERTIONS=1') + assert i > 0 and self.emcc_args[i-1] == '-s' + self.emcc_args[i] = 'ASSERTIONS=0' + test_path = path_from_root('tests', 'core', 'fnmatch') src, output = (test_path + s for s in ('.c', '.out')) self.do_run_from_file(src, output) @@ -7387,7 +7394,7 @@ def setUp(self): asm2g = make_run("asm2g", compiler=CLANG, emcc_args=["-O2", "-g", "-s", "ASSERTIONS=1", "-s", "SAFE_HEAP=1"]) asm1i = make_run("asm1i", compiler=CLANG, emcc_args=["-O1", '-s', 'EMTERPRETIFY=1']) asm3i = make_run("asm3i", compiler=CLANG, emcc_args=["-O3", '-s', 'EMTERPRETIFY=1']) -asm2m = make_run("asm2m", compiler=CLANG, emcc_args=["-O2", "--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2"]) +asm2m = make_run("asm2m", compiler=CLANG, emcc_args=["-O2", "--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2", "-s", "ASSERTIONS=1"]) # Legacy test modes - asm2nn = make_run("asm2nn", compiler=CLANG, emcc_args=["-O2"], env={"EMCC_NATIVE_OPTIMIZER": "0"}) From 9a1e23ee20fcc1c6589cc8b5bae207eeb4c6b8a9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 15 Jun 2015 16:33:39 -0700 Subject: [PATCH 27/32] fix interactive.test_freealut --- tests/test_interactive.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/test_interactive.py b/tests/test_interactive.py index 13a6a7ea2529f..98e176f6c6c17 100644 --- a/tests/test_interactive.py +++ b/tests/test_interactive.py @@ -110,14 +110,11 @@ def get_freealut_library(self): if WINDOWS and Building.which('cmake'): return self.get_library('freealut', os.path.join('hello_world.bc'), configure=['cmake', '.'], configure_args=['-DBUILD_TESTS=ON']) else: - return self.get_library('freealut', os.path.join('examples', 'hello_world.bc'), make_args=['EXEEXT=.bc']) + return self.get_library('freealut', [os.path.join('examples', '.libs', 'hello_world.bc'), os.path.join('src', '.libs', 'libalut.a')], make_args=['EXEEXT=.bc']) def test_freealut(self): - programs = self.get_freealut_library() - for program in programs: - assert os.path.exists(program) - Popen([PYTHON, EMCC, '-O2', program, '-o', 'page.html']).communicate() - self.run_browser('page.html', 'You should hear "Hello World!"') + Popen([PYTHON, EMCC, '-O2'] + self.get_freealut_library() + ['-o', 'page.html']).communicate() + self.run_browser('page.html', 'You should hear "Hello World!"') def test_vr(self): self.btest(path_from_root('tests', 'test_vr.c'), expected='0') From f0ed1b6b335f55c6903d078268cc72ef92007ad5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 15 Jun 2015 17:23:24 -0700 Subject: [PATCH 28/32] fix browser.test_emrun after pthreads merge broke it in 7cff83670de4c021633473d4ce539daff930195f --- src/emrun_postjs.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/emrun_postjs.js b/src/emrun_postjs.js index bb253aa445d67..63da3f8ece0a5 100644 --- a/src/emrun_postjs.js +++ b/src/emrun_postjs.js @@ -1,4 +1,4 @@ -if (typeof window === "object" && !ENVIRONMENT_IS_PTHREAD) { +if (typeof window === "object" && (typeof ENVIRONMENT_IS_PTHREAD === 'undefined' || !ENVIRONMENT_IS_PTHREAD)) { function emrun_register_handlers() { function post(msg) { var http = new XMLHttpRequest(); @@ -19,6 +19,7 @@ if (typeof window === "object" && !ENVIRONMENT_IS_PTHREAD) { post('^pageload^'); } } + window.addEventListener('load', emrun_register_handlers); } // POSTs the given binary data represented as a (typed) array data back to the emrun-based web server. From 4c46de34359b8cb2c67e2c8c4b4c688ad5605744 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 15 Jun 2015 15:27:16 -0700 Subject: [PATCH 29/32] comment --- tests/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_core.py b/tests/test_core.py index 5141f69ae54ad..7f22606f17969 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -4385,7 +4385,7 @@ def test_fnmatch(self): i = self.emcc_args.index('ASSERTIONS=1') assert i > 0 and self.emcc_args[i-1] == '-s' self.emcc_args[i] = 'ASSERTIONS=0' - + print 'flip assertions off' test_path = path_from_root('tests', 'core', 'fnmatch') src, output = (test_path + s for s in ('.c', '.out')) self.do_run_from_file(src, output) From 4e9da1053d8128df5ced222daa01fed8d00fd188 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 15 Jun 2015 15:27:37 -0700 Subject: [PATCH 30/32] refactor string memory initializer into tools/shared.py --- emcc | 24 +----------------------- tools/shared.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/emcc b/emcc index 57110025a7b5f..7255407420137 100755 --- a/emcc +++ b/emcc @@ -1355,29 +1355,7 @@ try: if not membytes: return '' if not memory_init_file: # memory initializer in a string literal - s = list(membytes) - if shared.Settings.ASSERTIONS: - # append checksum of length and content - crcTable = [] - for i in range(256): - crc = i - for bit in range(8): - crc = (crc >> 1) ^ ((crc & 1) * 0xedb88320) - crcTable.append(crc) - crc = 0xffffffff - n = len(s) - crc = crcTable[(crc ^ n) & 0xff] ^ (crc >> 8) - crc = crcTable[(crc ^ (n >> 8)) & 0xff] ^ (crc >> 8) - for i in s: - crc = crcTable[(crc ^ i) & 0xff] ^ (crc >> 8) - for i in range(4): - s.append((crc >> (8 * i)) & 0xff) - s = ''.join(map(chr, s)) - s = s.replace('\\', '\\\\').replace("'", "\\'") - s = s.replace('\n', '\\n').replace('\r', '\\r') - def escape(x): return '\\x{:02x}'.format(ord(x.group())) - s = re.sub('[\x80-\xff]', escape, s) - return "memoryInitializer = '%s';" % s + return "memoryInitializer = '%s';" % shared.JS.generate_string_initializer(list(membytes)) open(memfile, 'wb').write(''.join(map(chr, membytes))) if DEBUG: # Copy into temp dir as well, so can be run there too diff --git a/tools/shared.py b/tools/shared.py index 0ed58048437d4..0c1bcc4cd8ac0 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -1810,6 +1810,30 @@ def optimize_initializer(src): if len(contents) <= JS.INITIALIZER_CHUNK_SIZE: return None return JS.replace_initializers(src, JS.split_initializer(contents)) + @staticmethod + def generate_string_initializer(s): + if Settings.ASSERTIONS: + # append checksum of length and content + crcTable = [] + for i in range(256): + crc = i + for bit in range(8): + crc = (crc >> 1) ^ ((crc & 1) * 0xedb88320) + crcTable.append(crc) + crc = 0xffffffff + n = len(s) + crc = crcTable[(crc ^ n) & 0xff] ^ (crc >> 8) + crc = crcTable[(crc ^ (n >> 8)) & 0xff] ^ (crc >> 8) + for i in s: + crc = crcTable[(crc ^ i) & 0xff] ^ (crc >> 8) + for i in range(4): + s.append((crc >> (8 * i)) & 0xff) + s = ''.join(map(chr, s)) + s = s.replace('\\', '\\\\').replace("'", "\\'") + s = s.replace('\n', '\\n').replace('\r', '\\r') + def escape(x): return '\\x{:02x}'.format(ord(x.group())) + return re.sub('[\x80-\xff]', escape, s) + # Compression of code and data for smaller downloads class Compression: on = False From 549953a86733117950779231c16d0559ecbecc53 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 16 Jun 2015 12:44:06 -0700 Subject: [PATCH 31/32] add fuzzing for assertions and MEM_INIT_METHOD == 2 --- tests/fuzz/csmith_driver.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/fuzz/csmith_driver.py b/tests/fuzz/csmith_driver.py index 46f6ac53f0887..84214258f1f1b 100755 --- a/tests/fuzz/csmith_driver.py +++ b/tests/fuzz/csmith_driver.py @@ -118,6 +118,10 @@ def try_js(args=[]): js_args += ['-s', 'EMTERPRETIFY_WHITELIST=["_main"]'] # the opposite direction if random.random() < 0.5: js_args += ['-s', 'EMTERPRETIFY_ASYNC=1'] + if random.random() < 0.5: + js_args += ["--memory-init-file", "0", "-s", "MEM_INIT_METHOD=2"] + if random.random() < 0.5: + js_args += ['-s', 'ASSERTIONS=1'] print '(compile)', ' '.join(js_args) open(fullname, 'a').write('\n// ' + ' '.join(js_args) + '\n\n') try: From 667dcd241886fdb878e248d95d8e03abb09c80b7 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 16 Jun 2015 12:44:26 -0700 Subject: [PATCH 32/32] 1.34.0 --- emscripten-version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emscripten-version.txt b/emscripten-version.txt index fa911e3b71b7c..1368c976c81f7 100644 --- a/emscripten-version.txt +++ b/emscripten-version.txt @@ -1,2 +1,2 @@ -1.33.2 +1.34.0