From fa97850c2b3402b15cb34a4f7482a820880a98da Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 4 Sep 2015 10:48:19 -0700 Subject: [PATCH 01/64] fix a race condition in worker startup #3741 --- src/postamble.js | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/postamble.js b/src/postamble.js index 28cb673195bb5..80172de8a085b 100644 --- a/src/postamble.js +++ b/src/postamble.js @@ -346,21 +346,24 @@ run(); var workerResponded = false, workerCallbackId = -1; (function() { - var messageBuffer = null; + var messageBuffer = null, buffer = 0, bufferSize = 0; - function messageResender() { + function flushMessages() { + if (!messageBuffer) return; if (runtimeInitialized) { - assert(messageBuffer && messageBuffer.length > 0); messageBuffer.forEach(function(message) { onmessage(message); }); messageBuffer = null; - } else { - setTimeout(messageResender, 100); } } - var buffer = 0, bufferSize = 0; + function messageResender() { + flushMessages(); + if (messageBuffer) { + setTimeout(messageResender, 100); // still more to do + } + } onmessage = function onmessage(msg) { // if main has not yet been called (mem init file, other async things), buffer messages @@ -372,6 +375,7 @@ var workerResponded = false, workerCallbackId = -1; messageBuffer.push(msg); return; } + flushMessages(); var func = Module['_' + msg.data['funcName']]; if (!func) throw 'invalid worker function to call: ' + msg.data['funcName']; From f9bbc398a54acb1cc6ac0f4e6133bbb3784cffd3 Mon Sep 17 00:00:00 2001 From: Aidan Hobson Sayers Date: Thu, 22 Jan 2015 00:02:05 +0000 Subject: [PATCH 02/64] Attempt to clarify INCLUDE_FULL_LIBRARY --- src/settings.js | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/settings.js b/src/settings.js index b1c59c0bbeabb..06029f3c59688 100644 --- a/src/settings.js +++ b/src/settings.js @@ -437,14 +437,16 @@ var EXPORTED_GLOBALS = []; // Global non-function variables that are explicitly // exported, so they are guaranteed to be // accessible outside of the generated code. -var INCLUDE_FULL_LIBRARY = 0; // Whether to include the whole library rather than just the - // functions used by the generated code. This is needed when - // dynamically loading modules that make use of runtime +var INCLUDE_FULL_LIBRARY = 0; // Include all JS library functions instead of the sum of + // DEFAULT_LIBRARY_FUNCS_TO_INCLUDE + any functions used + // by the generated code. This is needed when dynamically + // loading (i.e. dlopen) modules that make use of runtime // library functions that are not used in the main module. - // Note that this includes js libraries but *not* C. You will - // need the main file to include all needed C libraries. For - // example, if a library uses malloc or new, you will need - // to use those in the main file too to link in dlmalloc. + // Note that this only applies to js libraries, *not* C. You + // will need the main file to include all needed C libraries. + // For example, if a module uses malloc or new, you will + // need to use those in the main file too to pull in dlmalloc + // for use by the module. var SHELL_FILE = 0; // set this to a string to override the shell file used From b9c9093f0d0b31d27a2ff79b3bfef24d78a490ad Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sat, 5 Sep 2015 17:04:06 -0700 Subject: [PATCH 03/64] mini-lz4.js --- tools/mini-lz4.js | 274 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 274 insertions(+) create mode 100644 tools/mini-lz4.js diff --git a/tools/mini-lz4.js b/tools/mini-lz4.js new file mode 100644 index 0000000000000..36581a21d82b6 --- /dev/null +++ b/tools/mini-lz4.js @@ -0,0 +1,274 @@ +/* +MiniLZ4: Minimal LZ4 block decoding and encoding. + +based off of node-lz4, https://github.com/pierrec/node-lz4 + +==== +Copyright (c) 2012 Pierre Curto + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +==== + +changes have the same license +*/ + +var MiniLZ4 = (function() { + +var exports = {}; + +/** + * Decode a block. Assumptions: input contains all sequences of a + * chunk, output is large enough to receive the decoded data. + * If the output buffer is too small, an error will be thrown. + * If the returned value is negative, an error occured at the returned offset. + * + * @param input {Buffer} input data + * @param output {Buffer} output data + * @return {Number} number of decoded bytes + * @private + */ +exports.uncompress = function (input, output, sIdx, eIdx) { + sIdx = sIdx || 0 + eIdx = eIdx || (input.length - sIdx) + // Process each sequence in the incoming data + for (var i = sIdx, n = eIdx, j = 0; i < n;) { + var token = input[i++] + + // Literals + var literals_length = (token >> 4) + if (literals_length > 0) { + // length of literals + var l = literals_length + 240 + while (l === 255) { + l = input[i++] + literals_length += l + } + + // Copy the literals + var end = i + literals_length + while (i < end) output[j++] = input[i++] + + // End of buffer? + if (i === n) return j + } + + // Match copy + // 2 bytes offset (little endian) + var offset = input[i++] | (input[i++] << 8) + + // 0 is an invalid offset value + if (offset === 0 || offset > j) return -(i-2) + + // length of match copy + var match_length = (token & 0xf) + var l = match_length + 240 + while (l === 255) { + l = input[i++] + match_length += l + } + + // Copy the match + var pos = j - offset // position of the match copy in the current output + var end = j + match_length + 4 // minmatch = 4 + while (j < end) output[j++] = output[pos++] + } + + return j +} + +var + maxInputSize = 0x7E000000 +, minMatch = 4 +// uint32() optimization +, hashLog = 16 +, hashShift = (minMatch * 8) - hashLog +, hashSize = 1 << hashLog + +, copyLength = 8 +, lastLiterals = 5 +, mfLimit = copyLength + minMatch +, skipStrength = 6 + +, mlBits = 4 +, mlMask = (1 << mlBits) - 1 +, runBits = 8 - mlBits +, runMask = (1 << runBits) - 1 + +, hasher = /* XXX uint32( */ 2654435761 /* ) */ + +// CompressBound returns the maximum length of a lz4 block, given it's uncompressed length +exports.compressBound = function (isize) { + return isize > maxInputSize + ? 0 + : (isize + (isize/255) + 16) | 0 +} + +exports.compress = function (src, dst, sIdx, eIdx) { + // V8 optimization: non sparse array with integers + var hashTable = new Array(hashSize) + for (var i = 0; i < hashSize; i++) { + hashTable[i] = 0 + } + return compressBlock(src, dst, 0, hashTable, sIdx || 0, eIdx || dst.length) +} + +function compressBlock (src, dst, pos, hashTable, sIdx, eIdx) { + // XXX var Hash = uint32() // Reusable unsigned 32 bits integer + var dpos = sIdx + var dlen = eIdx - sIdx + var anchor = 0 + + if (src.length >= maxInputSize) throw new Error("input too large") + + // Minimum of input bytes for compression (LZ4 specs) + if (src.length > mfLimit) { + var n = exports.compressBound(src.length) + if ( dlen < n ) throw Error("output too small: " + dlen + " < " + n) + + var + step = 1 + , findMatchAttempts = (1 << skipStrength) + 3 + // Keep last few bytes incompressible (LZ4 specs): + // last 5 bytes must be literals + , srcLength = src.length - mfLimit + + while (pos + minMatch < srcLength) { + // Find a match + // min match of 4 bytes aka sequence + var sequenceLowBits = src[pos+1]<<8 | src[pos] + var sequenceHighBits = src[pos+3]<<8 | src[pos+2] + // compute hash for the current sequence + var hash = Math.imul(sequenceLowBits | (sequenceHighBits << 16), hasher) >>> hashShift; + /* XXX Hash.fromBits(sequenceLowBits, sequenceHighBits) + .multiply(hasher) + .shiftr(hashShift) + .toNumber() */ + // get the position of the sequence matching the hash + // NB. since 2 different sequences may have the same hash + // it is double-checked below + // do -1 to distinguish between initialized and uninitialized values + var ref = hashTable[hash] - 1 + // save position of current sequence in hash table + hashTable[hash] = pos + 1 + + // first reference or within 64k limit or current sequence !== hashed one: no match + if ( ref < 0 || + ((pos - ref) >>> 16) > 0 || + ( + ((src[ref+3]<<8 | src[ref+2]) != sequenceHighBits) || + ((src[ref+1]<<8 | src[ref]) != sequenceLowBits ) + ) + ) { + // increase step if nothing found within limit + step = findMatchAttempts++ >> skipStrength + pos += step + continue + } + + findMatchAttempts = (1 << skipStrength) + 3 + + // got a match + var literals_length = pos - anchor + var offset = pos - ref + + // minMatch already verified + pos += minMatch + ref += minMatch + + // move to the end of the match (>=minMatch) + var match_length = pos + while (pos < srcLength && src[pos] == src[ref]) { + pos++ + ref++ + } + + // match length + match_length = pos - match_length + + // token + var token = match_length < mlMask ? match_length : mlMask + + // encode literals length + if (literals_length >= runMask) { + // add match length to the token + dst[dpos++] = (runMask << mlBits) + token + for (var len = literals_length - runMask; len > 254; len -= 255) { + dst[dpos++] = 255 + } + dst[dpos++] = len + } else { + // add match length to the token + dst[dpos++] = (literals_length << mlBits) + token + } + + // write literals + for (var i = 0; i < literals_length; i++) { + dst[dpos++] = src[anchor+i] + } + + // encode offset + dst[dpos++] = offset + dst[dpos++] = (offset >> 8) + + // encode match length + if (match_length >= mlMask) { + match_length -= mlMask + while (match_length >= 255) { + match_length -= 255 + dst[dpos++] = 255 + } + + dst[dpos++] = match_length + } + + anchor = pos + } + } + + // cannot compress input + if (anchor == 0) return 0 + + // Write last literals + // encode literals length + literals_length = src.length - anchor + if (literals_length >= runMask) { + // add match length to the token + dst[dpos++] = (runMask << mlBits) + for (var ln = literals_length - runMask; ln > 254; ln -= 255) { + dst[dpos++] = 255 + } + dst[dpos++] = ln + } else { + // add match length to the token + dst[dpos++] = (literals_length << mlBits) + } + + // write literals + pos = anchor + while (pos < src.length) { + dst[dpos++] = src[pos++] + } + + return dpos +} + +return exports; + +})(); + From 3715c2ccae0e91c0b098584af410d52f1aa6522c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sat, 5 Sep 2015 17:25:36 -0700 Subject: [PATCH 04/64] fix return value of compress to be the total bytes decompressed --- tools/mini-lz4.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/mini-lz4.js b/tools/mini-lz4.js index 36581a21d82b6..ca7ec6b3975d3 100644 --- a/tools/mini-lz4.js +++ b/tools/mini-lz4.js @@ -72,8 +72,9 @@ exports.uncompress = function (input, output, sIdx, eIdx) { // 2 bytes offset (little endian) var offset = input[i++] | (input[i++] << 8) - // 0 is an invalid offset value - if (offset === 0 || offset > j) return -(i-2) + // XXX 0 is an invalid offset value + if (offset === 0) return j + if (offset > j) return -(i-2) // length of match copy var match_length = (token & 0xf) From 212a110bacdd681f08d2314b243f1b06c369e87f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sat, 5 Sep 2015 20:57:17 -0700 Subject: [PATCH 05/64] initial work on LZ4FS --- src/library_fs.js | 3 +- src/library_lz4fs.js | 200 +++++++++++++++++++++++++++++++++++++ {tools => src}/mini-lz4.js | 0 src/modules.js | 1 + 4 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 src/library_lz4fs.js rename {tools => src}/mini-lz4.js (100%) diff --git a/src/library_fs.js b/src/library_fs.js index 1f576c64b515d..61a8c65a861cb 100644 --- a/src/library_fs.js +++ b/src/library_fs.js @@ -1,5 +1,5 @@ mergeInto(LibraryManager.library, { - $FS__deps: ['$ERRNO_CODES', '$ERRNO_MESSAGES', '__setErrNo', '$PATH', '$TTY', '$MEMFS', '$IDBFS', '$NODEFS', '$WORKERFS', 'stdin', 'stdout', 'stderr'], + $FS__deps: ['$ERRNO_CODES', '$ERRNO_MESSAGES', '__setErrNo', '$PATH', '$TTY', '$MEMFS', '$IDBFS', '$NODEFS', '$WORKERFS', '$LZ4FS', 'stdin', 'stdout', 'stderr'], $FS__postset: 'FS.staticInit();' + '__ATINIT__.unshift(function() { if (!Module["noFSInit"] && !FS.init.initialized) FS.init() });' + '__ATMAIN__.push(function() { FS.ignorePermissions = false });' + @@ -1354,6 +1354,7 @@ mergeInto(LibraryManager.library, { 'IDBFS': IDBFS, 'NODEFS': NODEFS, 'WORKERFS': WORKERFS, + 'LZ4FS': LZ4FS, }; }, init: function(input, output, error) { diff --git a/src/library_lz4fs.js b/src/library_lz4fs.js new file mode 100644 index 0000000000000..ec67396cda4c5 --- /dev/null +++ b/src/library_lz4fs.js @@ -0,0 +1,200 @@ +// TODO: put behind a flag +mergeInto(LibraryManager.library, { + $LZ4FS__deps: ['$FS'], + $LZ4FS: { + DIR_MODE: {{{ cDefine('S_IFDIR') }}} | 511 /* 0777 */, + FILE_MODE: {{{ cDefine('S_IFREG') }}} | 511 /* 0777 */, + CHUNK_SIZE: 1024, + LZ4: null, + lastChunk: null, + lastChunkIndex: -1, + mount: function (mount) { + if (!LZ4FS.LZ4) { + LZ4FS.LZ4 = (function() { + {{{ read('mini-lz4.js') }}}; + return MiniLZ4; + })(); + LZ4FS.lastChunk = new Uint8Array(LZ4FS.CHUNK_SIZE); + } + var root = LZ4FS.createNode(null, '/', LZ4FS.DIR_MODE, 0); + var createdParents = {}; + function ensureParent(path) { + // return the parent node, creating subdirs as necessary + var parts = path.split('/'); + var parent = root; + for (var i = 0; i < parts.length-1; i++) { + var curr = parts.slice(0, i+1).join('/'); + if (!createdParents[curr]) { + createdParents[curr] = LZ4FS.createNode(parent, curr, LZ4FS.DIR_MODE, 0); + } + parent = createdParents[curr]; + } + return parent; + } + function base(path) { + var parts = path.split('/'); + return parts[parts.length-1]; + } + mount.opts["packages"].forEach(function(pack) { + // compress the data in chunks + console.log('compressing package'); + var data = pack['data']; + var compressedChunks = []; + var offset = 0; + var total = 0; + while (offset < data.length) { + var chunk = data.subarray(offset, offset + LZ4FS.CHUNK_SIZE); + offset += LZ4FS.CHUNK_SIZE; + var bound = LZ4FS.LZ4.compressBound(chunk.length); + var compressed = new Uint8Array(bound); + var compressedSize = LZ4FS.LZ4.compress(chunk, compressed); + assert(compressedSize > 0 && compressedSize <= bound); + compressed = compressed.subarray(0, compressedSize); + compressedChunks.push(compressed); + total += compressedSize; + } + data = null; // XXX null out pack['data'] too? + var compressedData = { + data: new Uint8Array(total), + offsets: [], // chunk# => start in compressed data + sizes: [], + }; + offset = 0; + for (var i = 0; i < compressedChunks.length; i++) { + compressedData.data.set(compressedChunks[i], offset); + compressedData.offsets[i] = offset; + compressedData.sizes[i] = compressedChunks[i].length + offset += compressedChunks[i].length; + } + assert(offset === total); + compressedChunks.length = 0; + console.log('mounting package'); + pack['metadata'].files.forEach(function(file) { + var name = file.filename.substr(1); // remove initial slash + LZ4FS.createNode(ensureParent(name), base(name), LZ4FS.FILE_MODE, 0, { + compressedData: compressedData, + start: file.start, + end: file.end, + }); + }); + }); + return root; + }, + createNode: function (parent, name, mode, dev, contents, mtime) { + var node = FS.createNode(parent, name, mode); + node.mode = mode; + node.node_ops = LZ4FS.node_ops; + node.stream_ops = LZ4FS.stream_ops; + node.timestamp = (mtime || new Date).getTime(); + assert(LZ4FS.FILE_MODE !== LZ4FS.DIR_MODE); + if (mode === LZ4FS.FILE_MODE) { + node.size = contents.end - contents.start; + node.contents = contents; + } else { + node.size = 4096; + node.contents = {}; + } + if (parent) { + parent.contents[name] = node; + } + return node; + }, + node_ops: { + getattr: function(node) { + return { + dev: 1, + ino: undefined, + mode: node.mode, + nlink: 1, + uid: 0, + gid: 0, + rdev: undefined, + size: node.size, + atime: new Date(node.timestamp), + mtime: new Date(node.timestamp), + ctime: new Date(node.timestamp), + blksize: 4096, + blocks: Math.ceil(node.size / 4096), + }; + }, + setattr: function(node, attr) { + if (attr.mode !== undefined) { + node.mode = attr.mode; + } + if (attr.timestamp !== undefined) { + node.timestamp = attr.timestamp; + } + }, + lookup: function(parent, name) { + throw new FS.ErrnoError(ERRNO_CODES.ENOENT); + }, + mknod: function (parent, name, mode, dev) { + throw new FS.ErrnoError(ERRNO_CODES.EPERM); + }, + rename: function (oldNode, newDir, newName) { + throw new FS.ErrnoError(ERRNO_CODES.EPERM); + }, + unlink: function(parent, name) { + throw new FS.ErrnoError(ERRNO_CODES.EPERM); + }, + rmdir: function(parent, name) { + throw new FS.ErrnoError(ERRNO_CODES.EPERM); + }, + readdir: function(node) { + throw new FS.ErrnoError(ERRNO_CODES.EPERM); + }, + symlink: function(parent, newName, oldPath) { + throw new FS.ErrnoError(ERRNO_CODES.EPERM); + }, + readlink: function(node) { + throw new FS.ErrnoError(ERRNO_CODES.EPERM); + }, + }, + stream_ops: { + read: function (stream, buffer, offset, length, position) { + length = Math.min(length, stream.node.size - position); + if (length <= 0) return 0; + var contents = stream.node.contents; + var written = 0; + while (written < length) { + var start = contents.start + position + written; // start index in uncompressed data + var chunkIndex = Math.floor(start / LZ4FS.CHUNK_SIZE); + var compressedStart = contents.compressedData.offsets[chunkIndex]; + var compressedSize = contents.compressedData.sizes[chunkIndex]; + if (chunkIndex !== LZ4FS.lastChunkIndex) { + // decompress the chunk + console.log('decompressing chunk ' + chunkIndex); + var compressed = compressedData.data.subarray(compressedStart, compressedStart + compressedSize); + var originalSize = LZ4FS.LZ4.uncompress(compressed, LZ4FS.lastChunk); + assert(originalSize === LZ4FS.CHUNK_SIZE); + LZ4FS.lastChunkIndex = chunkIndex; + } + var startInChunk = start % LZ4FS.CHUNK_SIZE; + var desired = length - written; + var endInChunk = Math.min(startInChunk + desired, LZ4FS.CHUNK_SIZE); + buffer.set(LZ4FS.lastChunk.subarray(startInChunk, endInChunk), offset + written); + var currWritten = endInChunk - startInChunk; + written += currWritten; + } + return written; + }, + write: function (stream, buffer, offset, length, position) { + throw new FS.ErrnoError(ERRNO_CODES.EIO); + }, + llseek: function (stream, offset, whence) { + var position = offset; + if (whence === 1) { // SEEK_CUR. + position += stream.position; + } else if (whence === 2) { // SEEK_END. + if (FS.isFile(stream.node.mode)) { + position += stream.node.size; + } + } + if (position < 0) { + throw new FS.ErrnoError(ERRNO_CODES.EINVAL); + } + return position; + }, + }, + }, +}); diff --git a/tools/mini-lz4.js b/src/mini-lz4.js similarity index 100% rename from tools/mini-lz4.js rename to src/mini-lz4.js diff --git a/src/modules.js b/src/modules.js index e5c85b9e50850..0f376a260c4f7 100644 --- a/src/modules.js +++ b/src/modules.js @@ -110,6 +110,7 @@ var LibraryManager = { 'library_nodefs.js', 'library_sockfs.js', 'library_workerfs.js', + 'library_lz4fs.js', 'library_tty.js' ]); } From ced2e3a4588b7725ac26f72be9134eefcd79d1fa Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sat, 5 Sep 2015 21:37:43 -0700 Subject: [PATCH 06/64] initial work on an LZ4FS test --- tests/fs/test_lz4fs.cpp | 108 ++++++++++++++++++++++++++++++++++++++++ tests/test_browser.py | 7 +++ 2 files changed, 115 insertions(+) create mode 100644 tests/fs/test_lz4fs.cpp diff --git a/tests/fs/test_lz4fs.cpp b/tests/fs/test_lz4fs.cpp new file mode 100644 index 0000000000000..c789ec357d5e4 --- /dev/null +++ b/tests/fs/test_lz4fs.cpp @@ -0,0 +1,108 @@ +#include +#include +#include +#include + +#include + +double before_it_all; + +extern "C" { + +void EMSCRIPTEN_KEEPALIVE finish() { + // load some file data, SYNCHRONOUSLY :) + char buffer[100]; + int num; + + printf("load files\n"); + FILE *f1 = fopen("files/file1.txt", "r"); + assert(f1); + FILE *f2 = fopen("files/file2.txt", "r"); + assert(f2); + FILE *f3 = fopen("files/file3.txt", "r"); + assert(f3); + FILE *files[] = { f1, f2, f3 }; + double before = emscripten_get_now(); + int counter = 0; + for (int i = 0; i < 10*1024*1024 - 10; i += 100*1024) { + i += random() % 10; + int which = i % 3; + FILE *f = files[which]; + printf("%d read %d: %d (%d)\n", counter, which, i, i % 10); + int off = i % 10; + int ret = fseek(f, i, SEEK_SET); + assert(ret == 0); + num = fread(buffer, 1, 5, f); + if (num != 5) { + printf("%d read %d: %d failed num\n", counter, which, i); + abort(); + } + buffer[5] = 0; + char correct[] = "01234567890123456789"; + if (strncmp(buffer, correct + off, 5) != 0) { + printf("%d read %d: %d (%d) failed data\n", counter, which, i, i % 10); + abort(); + } + counter++; + } + double after = emscripten_get_now(); + fclose(f1); + fclose(f2); + fclose(f3); + printf("read IO time: %f\n", after - before); + + printf("total time: %f\n", after - before_it_all); + + // all done + printf("success\n"); + int result = 1; + REPORT_RESULT(); +} + +} + +int main() { + before_it_all = emscripten_get_now(); + + EM_ASM({ + var meta, blob; + function maybeReady() { + if (!(meta && blob)) return; + + meta = JSON.parse(meta); + + Module.print('loading into filesystem'); + FS.mkdir('/files'); + FS.mount(LZ4FS, { + packages: [{ metadata: meta, blob: blob }] + }, '/files'); + + Module.ccall('finish'); + } + + var meta_xhr = new XMLHttpRequest(); + meta_xhr.open("GET", "files.js.metadata", true); + meta_xhr.responseType = "text"; + meta_xhr.onload = function() { + Module.print('got metadata'); + meta = meta_xhr.response; + maybeReady(); + }; + meta_xhr.send(); + + var data_xhr = new XMLHttpRequest(); + data_xhr.open("GET", "files.data", true); + data_xhr.responseType = "blob"; + data_xhr.onload = function() { + Module.print('got data'); + blob = data_xhr.response; + maybeReady(); + }; + data_xhr.send(); + }); + + emscripten_exit_with_live_runtime(); + + return 1; +} + diff --git a/tests/test_browser.py b/tests/test_browser.py index 0bf1b0743d671..3467724c8b00e 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1031,6 +1031,13 @@ def test_fs_workerfs_package(self): Popen([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', os.path.join('sub', 'file2.txt'), '--separate-metadata', '--js-output=files.js']).communicate() self.btest(os.path.join('fs', 'test_workerfs_package.cpp'), '1', args=['--proxy-to-worker']) + def test_fs_lz4fs_package(self): + #open('file1.txt', 'w').write('0123456789' * (1024*1024)) + #shutil.copyfile('file1.txt', 'file2.txt') + #shutil.copyfile('file1.txt', 'file3.txt') + #Popen([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'file2.txt', 'file3.txt', '--separate-metadata', '--js-output=files.js']).communicate() + self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '1', args=[], timeout=60) + def test_idbstore(self): secret = str(time.time()) for stage in [0, 1, 2, 3, 0, 1, 2, 0, 0, 1, 4, 2, 5]: From 226b24b47044ce9a6d44970c65f24a05a7dec18c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sat, 5 Sep 2015 21:50:06 -0700 Subject: [PATCH 07/64] get test passing --- src/library_lz4fs.js | 5 ++++- tests/fs/test_lz4fs.cpp | 12 ++++++------ tests/test_browser.py | 8 ++++---- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/library_lz4fs.js b/src/library_lz4fs.js index ec67396cda4c5..a28e93dadd640 100644 --- a/src/library_lz4fs.js +++ b/src/library_lz4fs.js @@ -39,11 +39,14 @@ mergeInto(LibraryManager.library, { // compress the data in chunks console.log('compressing package'); var data = pack['data']; + assert(data instanceof ArrayBuffer); + data = new Uint8Array(data); var compressedChunks = []; var offset = 0; var total = 0; while (offset < data.length) { var chunk = data.subarray(offset, offset + LZ4FS.CHUNK_SIZE); + //console.log('compress a chunk ' + [offset, total, data.length]); offset += LZ4FS.CHUNK_SIZE; var bound = LZ4FS.LZ4.compressBound(chunk.length); var compressed = new Uint8Array(bound); @@ -164,7 +167,7 @@ mergeInto(LibraryManager.library, { if (chunkIndex !== LZ4FS.lastChunkIndex) { // decompress the chunk console.log('decompressing chunk ' + chunkIndex); - var compressed = compressedData.data.subarray(compressedStart, compressedStart + compressedSize); + var compressed = contents.compressedData.data.subarray(compressedStart, compressedStart + compressedSize); var originalSize = LZ4FS.LZ4.uncompress(compressed, LZ4FS.lastChunk); assert(originalSize === LZ4FS.CHUNK_SIZE); LZ4FS.lastChunkIndex = chunkIndex; diff --git a/tests/fs/test_lz4fs.cpp b/tests/fs/test_lz4fs.cpp index c789ec357d5e4..f2cc04cfbe01f 100644 --- a/tests/fs/test_lz4fs.cpp +++ b/tests/fs/test_lz4fs.cpp @@ -24,7 +24,7 @@ void EMSCRIPTEN_KEEPALIVE finish() { FILE *files[] = { f1, f2, f3 }; double before = emscripten_get_now(); int counter = 0; - for (int i = 0; i < 10*1024*1024 - 10; i += 100*1024) { + for (int i = 0; i < 10*1024*128 - 10; i += 100*1024) { i += random() % 10; int which = i % 3; FILE *f = files[which]; @@ -65,16 +65,16 @@ int main() { before_it_all = emscripten_get_now(); EM_ASM({ - var meta, blob; + var meta, data; function maybeReady() { - if (!(meta && blob)) return; + if (!(meta && data)) return; meta = JSON.parse(meta); Module.print('loading into filesystem'); FS.mkdir('/files'); FS.mount(LZ4FS, { - packages: [{ metadata: meta, blob: blob }] + packages: [{ metadata: meta, data: data }] }, '/files'); Module.ccall('finish'); @@ -92,10 +92,10 @@ int main() { var data_xhr = new XMLHttpRequest(); data_xhr.open("GET", "files.data", true); - data_xhr.responseType = "blob"; + data_xhr.responseType = "arraybuffer"; data_xhr.onload = function() { Module.print('got data'); - blob = data_xhr.response; + data = data_xhr.response; maybeReady(); }; data_xhr.send(); diff --git a/tests/test_browser.py b/tests/test_browser.py index 3467724c8b00e..ca9538640ed97 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1032,10 +1032,10 @@ def test_fs_workerfs_package(self): self.btest(os.path.join('fs', 'test_workerfs_package.cpp'), '1', args=['--proxy-to-worker']) def test_fs_lz4fs_package(self): - #open('file1.txt', 'w').write('0123456789' * (1024*1024)) - #shutil.copyfile('file1.txt', 'file2.txt') - #shutil.copyfile('file1.txt', 'file3.txt') - #Popen([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'file2.txt', 'file3.txt', '--separate-metadata', '--js-output=files.js']).communicate() + open('file1.txt', 'w').write('0123456789' * (1024*128)) + shutil.copyfile('file1.txt', 'file2.txt') + shutil.copyfile('file1.txt', 'file3.txt') + Popen([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'file2.txt', 'file3.txt', '--separate-metadata', '--js-output=files.js']).communicate() self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '1', args=[], timeout=60) def test_idbstore(self): From d83b7f08bde7332eb7459f3741f552d135c5e298 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sat, 5 Sep 2015 22:04:51 -0700 Subject: [PATCH 08/64] use an LZ4FS chunk size that is tuned to the musl prefetch size --- src/library_lz4fs.js | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/library_lz4fs.js b/src/library_lz4fs.js index a28e93dadd640..74e98fb5123ab 100644 --- a/src/library_lz4fs.js +++ b/src/library_lz4fs.js @@ -4,7 +4,7 @@ mergeInto(LibraryManager.library, { $LZ4FS: { DIR_MODE: {{{ cDefine('S_IFDIR') }}} | 511 /* 0777 */, FILE_MODE: {{{ cDefine('S_IFREG') }}} | 511 /* 0777 */, - CHUNK_SIZE: 1024, + CHUNK_SIZE: 2048, // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea LZ4: null, lastChunk: null, lastChunkIndex: -1, @@ -37,10 +37,10 @@ mergeInto(LibraryManager.library, { } mount.opts["packages"].forEach(function(pack) { // compress the data in chunks - console.log('compressing package'); var data = pack['data']; assert(data instanceof ArrayBuffer); data = new Uint8Array(data); + console.log('compressing package of size ' + data.length); var compressedChunks = []; var offset = 0; var total = 0; @@ -80,6 +80,7 @@ mergeInto(LibraryManager.library, { end: file.end, }); }); + console.log('compressed package into ' + compressedData.data.length); }); return root; }, @@ -155,12 +156,15 @@ mergeInto(LibraryManager.library, { }, stream_ops: { read: function (stream, buffer, offset, length, position) { + console.log('LZ4FS read ' + [offset, length, position]); length = Math.min(length, stream.node.size - position); if (length <= 0) return 0; var contents = stream.node.contents; var written = 0; while (written < length) { var start = contents.start + position + written; // start index in uncompressed data + var desired = length - written; + console.log('current read: ' + ['start', start, 'desired', desired]); var chunkIndex = Math.floor(start / LZ4FS.CHUNK_SIZE); var compressedStart = contents.compressedData.offsets[chunkIndex]; var compressedSize = contents.compressedData.sizes[chunkIndex]; @@ -173,7 +177,6 @@ mergeInto(LibraryManager.library, { LZ4FS.lastChunkIndex = chunkIndex; } var startInChunk = start % LZ4FS.CHUNK_SIZE; - var desired = length - written; var endInChunk = Math.min(startInChunk + desired, LZ4FS.CHUNK_SIZE); buffer.set(LZ4FS.lastChunk.subarray(startInChunk, endInChunk), offset + written); var currWritten = endInChunk - startInChunk; From 8cc3a4db8560dc48ad87a0bce15ddfa219513c5b Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi Date: Sun, 6 Sep 2015 18:13:39 +0300 Subject: [PATCH 09/64] Add missed type to FS.mount docs --- site/source/docs/api_reference/Filesystem-API.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/source/docs/api_reference/Filesystem-API.rst b/site/source/docs/api_reference/Filesystem-API.rst index 8c22fcea6cfd1..f1aacf2983098 100644 --- a/site/source/docs/api_reference/Filesystem-API.rst +++ b/site/source/docs/api_reference/Filesystem-API.rst @@ -131,7 +131,7 @@ File system API Mounts the FS object specified by ``type`` to the directory specified by ``mountpoint``. The ``opts`` object is specific to each file system type. - :param type: The :ref:`file system type `: ``MEMFS``, ``NODEFS``, or ``IDBFS``. + :param type: The :ref:`file system type `: ``MEMFS``, ``NODEFS``, ``IDBFS`` or ``WORKERFS``. :param object opts: A generic settings object used by the underlying file system. ``NODFES`` uses the `root` parameter to map the Emscripten directory to the physical directory. For example, to mount the current folder as a NODEFS instance: From 7c695147ae3958b64468a68ebb17559c079f0e72 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 09:19:59 -0700 Subject: [PATCH 10/64] cache a chunk in each compressed package data separately in LZ4FS --- src/library_lz4fs.js | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/library_lz4fs.js b/src/library_lz4fs.js index 74e98fb5123ab..120c821e1365b 100644 --- a/src/library_lz4fs.js +++ b/src/library_lz4fs.js @@ -6,15 +6,12 @@ mergeInto(LibraryManager.library, { FILE_MODE: {{{ cDefine('S_IFREG') }}} | 511 /* 0777 */, CHUNK_SIZE: 2048, // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea LZ4: null, - lastChunk: null, - lastChunkIndex: -1, mount: function (mount) { if (!LZ4FS.LZ4) { LZ4FS.LZ4 = (function() { {{{ read('mini-lz4.js') }}}; return MiniLZ4; })(); - LZ4FS.lastChunk = new Uint8Array(LZ4FS.CHUNK_SIZE); } var root = LZ4FS.createNode(null, '/', LZ4FS.DIR_MODE, 0); var createdParents = {}; @@ -58,10 +55,15 @@ mergeInto(LibraryManager.library, { } data = null; // XXX null out pack['data'] too? var compressedData = { - data: new Uint8Array(total), + data: new Uint8Array(total + LZ4FS.CHUNK_SIZE), // store all the compressed data, plus room for one cached decompressed chunk, in one fast array + cachedOffset: total, + cachedChunk: null, + cachedIndex: -1, offsets: [], // chunk# => start in compressed data sizes: [], }; + compressedData.cachedChunk = compressedData.data.subarray(compressedData.cachedOffset); + assert(compressedData.cachedChunk.length === LZ4FS.CHUNK_SIZE); offset = 0; for (var i = 0; i < compressedChunks.length; i++) { compressedData.data.set(compressedChunks[i], offset); @@ -160,25 +162,26 @@ mergeInto(LibraryManager.library, { length = Math.min(length, stream.node.size - position); if (length <= 0) return 0; var contents = stream.node.contents; + var compressedData = contents.compressedData; var written = 0; while (written < length) { var start = contents.start + position + written; // start index in uncompressed data var desired = length - written; console.log('current read: ' + ['start', start, 'desired', desired]); var chunkIndex = Math.floor(start / LZ4FS.CHUNK_SIZE); - var compressedStart = contents.compressedData.offsets[chunkIndex]; - var compressedSize = contents.compressedData.sizes[chunkIndex]; - if (chunkIndex !== LZ4FS.lastChunkIndex) { + var compressedStart = compressedData.offsets[chunkIndex]; + var compressedSize = compressedData.sizes[chunkIndex]; + if (chunkIndex !== compressedData.cachedIndex) { // decompress the chunk console.log('decompressing chunk ' + chunkIndex); - var compressed = contents.compressedData.data.subarray(compressedStart, compressedStart + compressedSize); - var originalSize = LZ4FS.LZ4.uncompress(compressed, LZ4FS.lastChunk); + var compressed = compressedData.data.subarray(compressedStart, compressedStart + compressedSize); + var originalSize = LZ4FS.LZ4.uncompress(compressed, compressedData.cachedChunk); assert(originalSize === LZ4FS.CHUNK_SIZE); - LZ4FS.lastChunkIndex = chunkIndex; + compressedData.cachedIndex = chunkIndex; } var startInChunk = start % LZ4FS.CHUNK_SIZE; var endInChunk = Math.min(startInChunk + desired, LZ4FS.CHUNK_SIZE); - buffer.set(LZ4FS.lastChunk.subarray(startInChunk, endInChunk), offset + written); + buffer.set(compressedData.cachedChunk.subarray(startInChunk, endInChunk), offset + written); var currWritten = endInChunk - startInChunk; written += currWritten; } From a07471e10f2a9d6b96c7d180388b9464dd2b9a2d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 10:19:44 -0700 Subject: [PATCH 11/64] improve LZ4FS test --- src/library_lz4fs.js | 8 ++++---- tests/fs/test_lz4fs.cpp | 13 ++++++------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/library_lz4fs.js b/src/library_lz4fs.js index 120c821e1365b..b98b0436a324e 100644 --- a/src/library_lz4fs.js +++ b/src/library_lz4fs.js @@ -71,6 +71,7 @@ mergeInto(LibraryManager.library, { compressedData.sizes[i] = compressedChunks[i].length offset += compressedChunks[i].length; } + console.log('compressed package into ' + compressedData.data.length); assert(offset === total); compressedChunks.length = 0; console.log('mounting package'); @@ -82,7 +83,6 @@ mergeInto(LibraryManager.library, { end: file.end, }); }); - console.log('compressed package into ' + compressedData.data.length); }); return root; }, @@ -158,7 +158,7 @@ mergeInto(LibraryManager.library, { }, stream_ops: { read: function (stream, buffer, offset, length, position) { - console.log('LZ4FS read ' + [offset, length, position]); + //console.log('LZ4FS read ' + [offset, length, position]); length = Math.min(length, stream.node.size - position); if (length <= 0) return 0; var contents = stream.node.contents; @@ -167,13 +167,13 @@ mergeInto(LibraryManager.library, { while (written < length) { var start = contents.start + position + written; // start index in uncompressed data var desired = length - written; - console.log('current read: ' + ['start', start, 'desired', desired]); + //console.log('current read: ' + ['start', start, 'desired', desired]); var chunkIndex = Math.floor(start / LZ4FS.CHUNK_SIZE); var compressedStart = compressedData.offsets[chunkIndex]; var compressedSize = compressedData.sizes[chunkIndex]; if (chunkIndex !== compressedData.cachedIndex) { // decompress the chunk - console.log('decompressing chunk ' + chunkIndex); + //console.log('decompressing chunk ' + chunkIndex); var compressed = compressedData.data.subarray(compressedStart, compressedStart + compressedSize); var originalSize = LZ4FS.LZ4.uncompress(compressed, compressedData.cachedChunk); assert(originalSize === LZ4FS.CHUNK_SIZE); diff --git a/tests/fs/test_lz4fs.cpp b/tests/fs/test_lz4fs.cpp index f2cc04cfbe01f..893a0117e0715 100644 --- a/tests/fs/test_lz4fs.cpp +++ b/tests/fs/test_lz4fs.cpp @@ -24,11 +24,12 @@ void EMSCRIPTEN_KEEPALIVE finish() { FILE *files[] = { f1, f2, f3 }; double before = emscripten_get_now(); int counter = 0; - for (int i = 0; i < 10*1024*128 - 10; i += 100*1024) { - i += random() % 10; + int i = 0; + printf("read from files\n"); + for (int i = 0; i < 10*1024*128 - 10; i += 100) { int which = i % 3; FILE *f = files[which]; - printf("%d read %d: %d (%d)\n", counter, which, i, i % 10); + //printf("%d read %d: %d (%d)\n", counter, which, i, i % 10); int off = i % 10; int ret = fseek(f, i, SEEK_SET); assert(ret == 0); @@ -44,17 +45,15 @@ void EMSCRIPTEN_KEEPALIVE finish() { abort(); } counter++; + i += random() % 1024; } double after = emscripten_get_now(); fclose(f1); fclose(f2); fclose(f3); - printf("read IO time: %f\n", after - before); - - printf("total time: %f\n", after - before_it_all); + printf("success. read IO time: %f (%d reads), total time: %f\n", after - before, counter, after - before_it_all); // all done - printf("success\n"); int result = 1; REPORT_RESULT(); } From 496d61b823a6faa0cac7bec9111679fa623cf860 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 10:22:13 -0700 Subject: [PATCH 12/64] further LZ4FS test tweaks --- tests/fs/test_lz4fs.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/fs/test_lz4fs.cpp b/tests/fs/test_lz4fs.cpp index 893a0117e0715..b4c10608b3d9f 100644 --- a/tests/fs/test_lz4fs.cpp +++ b/tests/fs/test_lz4fs.cpp @@ -26,7 +26,7 @@ void EMSCRIPTEN_KEEPALIVE finish() { int counter = 0; int i = 0; printf("read from files\n"); - for (int i = 0; i < 10*1024*128 - 10; i += 100) { + for (int i = 0; i < 10*1024*128 - 5; i += random() % 1000) { int which = i % 3; FILE *f = files[which]; //printf("%d read %d: %d (%d)\n", counter, which, i, i % 10); @@ -45,7 +45,6 @@ void EMSCRIPTEN_KEEPALIVE finish() { abort(); } counter++; - i += random() % 1024; } double after = emscripten_get_now(); fclose(f1); From 7fec585956cdf67519f454ae694f50d68c03b188 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 11:16:10 -0700 Subject: [PATCH 13/64] test files with different content in LZ4FS --- src/library_lz4fs.js | 2 ++ tests/fs/test_lz4fs.cpp | 2 +- tests/test_browser.py | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/library_lz4fs.js b/src/library_lz4fs.js index b98b0436a324e..f4d94a2d1ec6f 100644 --- a/src/library_lz4fs.js +++ b/src/library_lz4fs.js @@ -175,7 +175,9 @@ mergeInto(LibraryManager.library, { // decompress the chunk //console.log('decompressing chunk ' + chunkIndex); var compressed = compressedData.data.subarray(compressedStart, compressedStart + compressedSize); + //var t = Date.now(); var originalSize = LZ4FS.LZ4.uncompress(compressed, compressedData.cachedChunk); + //console.log('decompress time: ' + (Date.now() - t)); assert(originalSize === LZ4FS.CHUNK_SIZE); compressedData.cachedIndex = chunkIndex; } diff --git a/tests/fs/test_lz4fs.cpp b/tests/fs/test_lz4fs.cpp index b4c10608b3d9f..a79b92ce15dec 100644 --- a/tests/fs/test_lz4fs.cpp +++ b/tests/fs/test_lz4fs.cpp @@ -40,7 +40,7 @@ void EMSCRIPTEN_KEEPALIVE finish() { } buffer[5] = 0; char correct[] = "01234567890123456789"; - if (strncmp(buffer, correct + off, 5) != 0) { + if (strncmp(buffer, correct + which + off, 5) != 0) { printf("%d read %d: %d (%d) failed data\n", counter, which, i, i % 10); abort(); } diff --git a/tests/test_browser.py b/tests/test_browser.py index ca9538640ed97..64d7e2659bca3 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1033,8 +1033,8 @@ def test_fs_workerfs_package(self): def test_fs_lz4fs_package(self): open('file1.txt', 'w').write('0123456789' * (1024*128)) - shutil.copyfile('file1.txt', 'file2.txt') - shutil.copyfile('file1.txt', 'file3.txt') + open('file2.txt', 'w').write('1234567890' * (1024*128)) + open('file3.txt', 'w').write('2345678901' * (1024*128)) Popen([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'file2.txt', 'file3.txt', '--separate-metadata', '--js-output=files.js']).communicate() self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '1', args=[], timeout=60) From c97f27830409c772834fbbaf4ad72f812a6c776c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 15:02:27 -0700 Subject: [PATCH 14/64] use metadata in file packager output more consistently --- tools/file_packager.py | 53 ++++++++++++++---------------------------- 1 file changed, 18 insertions(+), 35 deletions(-) diff --git a/tools/file_packager.py b/tools/file_packager.py index eb7c4c1af277e..f60b901c12357 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -55,6 +55,7 @@ from shared import Compression, execute, suffix, unsuffixed from subprocess import Popen, PIPE, STDOUT import fnmatch +import json if len(sys.argv) == 1: print '''Usage: file_packager.py TARGET [--preload A...] [--embed B...] [--exclude C...] [--compress COMPRESSION_DATA] [--no-closure] [--crunch[=X]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--no-heap-copy] [--separate-metadata] @@ -300,8 +301,7 @@ def was_seen(name): for plugin in plugins: plugin(file_) -if separate_metadata: - metadata = {'files': []} +metadata = {'files': []} # Crunch files if crunch: @@ -445,7 +445,7 @@ def was_seen(name): } else { ''', '' if not crunch else ''' } -''', '' if not separate_metadata else ''' +''', ''' var files = metadata.files; for (i = 0; i < files.length; ++i) { new DataRequest(files[i].start, files[i].end, files[i].crunched, files[i].audio).open('GET', files[i].filename); @@ -475,23 +475,13 @@ def was_seen(name): # Preload varname = 'filePreload%d' % counter counter += 1 - if separate_metadata: - metadata['files'].append({ - 'filename': escape_for_js_string(file_['dstpath']), - 'start': file_['data_start'], - 'end': file_['data_end'], - 'crunched': '1' if crunch and filename.endswith(CRUNCH_INPUT_SUFFIX) else '0', - 'audio': '1' if filename[-4:] in AUDIO_SUFFIXES else '0', - }) - else: - code += ''' new DataRequest(%(start)d, %(end)d, %(crunched)s, %(audio)s).open('GET', '%(filename)s'); -''' % { - 'filename': escape_for_js_string(file_['dstpath']), - 'start': file_['data_start'], - 'end': file_['data_end'], - 'crunched': '1' if crunch and filename.endswith(CRUNCH_INPUT_SUFFIX) else '0', - 'audio': '1' if filename[-4:] in AUDIO_SUFFIXES else '0', - } + metadata['files'].append({ + 'filename': file_['dstpath'], + 'start': file_['data_start'], + 'end': file_['data_end'], + 'crunched': '1' if crunch and filename.endswith(CRUNCH_INPUT_SUFFIX) else '0', + 'audio': '1' if filename[-4:] in AUDIO_SUFFIXES else '0', + }) else: assert 0 @@ -548,18 +538,12 @@ def was_seen(name): Module['locateFile'](REMOTE_PACKAGE_BASE) : ((Module['filePackagePrefixURL'] || '') + REMOTE_PACKAGE_BASE); ''' % (data_target, remote_package_name) - if separate_metadata: - metadata['remote_package_size'] = remote_package_size - metadata['package_uuid'] = str(package_uuid) - ret += ''' - var REMOTE_PACKAGE_SIZE = metadata.remote_package_size; - var PACKAGE_UUID = metadata.package_uuid; - ''' - else: - ret += ''' - var REMOTE_PACKAGE_SIZE = %d; - var PACKAGE_UUID = '%s'; - ''' % (remote_package_size, package_uuid) + metadata['remote_package_size'] = remote_package_size + metadata['package_uuid'] = str(package_uuid) + ret += ''' + var REMOTE_PACKAGE_SIZE = metadata.remote_package_size; + var PACKAGE_UUID = metadata.package_uuid; + ''' if use_preload_cache: code += r''' @@ -821,8 +805,8 @@ def was_seen(name): }); ''' % {'metadata_file': os.path.basename(jsoutput + '.metadata')} if separate_metadata else ''' } - loadPackage(); -''') + loadPackage(%s); +''' % json.dumps(metadata)) if force or len(data_files) > 0: if jsoutput == None: @@ -841,7 +825,6 @@ def was_seen(name): f.write(ret) f.close() if separate_metadata: - import json f = open(jsoutput + '.metadata', 'w') json.dump(metadata, f, separators=(',', ':')) f.close() From 26f51c095939c0df06d0874dd3162393651dcaee Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 15:28:35 -0700 Subject: [PATCH 15/64] update test --- tests/test_other.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_other.py b/tests/test_other.py index 4300d732f32ac..eb310bba3a0ba 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -2087,7 +2087,7 @@ def test_file_packager(self): assert len(out2) > 0 assert 'below the current directory' not in err2 def clean(txt): - return filter(lambda line: 'PACKAGE_UUID' not in line, txt.split('\n')) + return filter(lambda line: 'PACKAGE_UUID' not in line and 'loadPackage({' not in line, txt.split('\n')) out = clean(out) out2 = clean(out2) assert out == out2 From 7dc1e3991bb4a732345babd5f352f6a5c5ee7afa Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 15:29:34 -0700 Subject: [PATCH 16/64] use metadata to finish loading preloaded files --- tools/file_packager.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/file_packager.py b/tools/file_packager.py index f60b901c12357..8f3d0eabbc6a5 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -500,9 +500,12 @@ def was_seen(name): // Reuse the bytearray from the XHR as the source for file reads. DataRequest.prototype.byteArray = byteArray; ''' - for file_ in data_files: - if file_['mode'] == 'preload': - use_data += ' DataRequest.prototype.requests["%s"].onload();\n' % (escape_for_js_string(file_['dstpath'])) + use_data += ''' + var files = metadata.files; + for (i = 0; i < files.length; ++i) { + DataRequest.prototype.requests[files[i].filename].onload(); + } + ''' use_data += " Module['removeRunDependency']('datafile_%s');\n" % data_target if Compression.on: From 26af4736b6bb837170df704a50eb8237e81bfbba Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 17:06:32 -0700 Subject: [PATCH 17/64] handle uncompressible data in lz4 --- src/library_lz4fs.js | 46 ++++++++++++++++++++++++++++------------- tests/fs/test_lz4fs.cpp | 21 ++++++++++++++----- tests/test_browser.py | 5 ++++- 3 files changed, 52 insertions(+), 20 deletions(-) diff --git a/src/library_lz4fs.js b/src/library_lz4fs.js index f4d94a2d1ec6f..f1c4410ea3b70 100644 --- a/src/library_lz4fs.js +++ b/src/library_lz4fs.js @@ -39,6 +39,7 @@ mergeInto(LibraryManager.library, { data = new Uint8Array(data); console.log('compressing package of size ' + data.length); var compressedChunks = []; + var successes = []; var offset = 0; var total = 0; while (offset < data.length) { @@ -48,10 +49,19 @@ mergeInto(LibraryManager.library, { var bound = LZ4FS.LZ4.compressBound(chunk.length); var compressed = new Uint8Array(bound); var compressedSize = LZ4FS.LZ4.compress(chunk, compressed); - assert(compressedSize > 0 && compressedSize <= bound); - compressed = compressed.subarray(0, compressedSize); - compressedChunks.push(compressed); - total += compressedSize; + if (compressedSize > 0) { + assert(compressedSize <= bound); + compressed = compressed.subarray(0, compressedSize); + compressedChunks.push(compressed); + total += compressedSize; + successes.push(1); + } else { + assert(compressedSize === 0); + // failure to compress :( + compressedChunks.push(chunk); + total += LZ4FS.CHUNK_SIZE; + successes.push(0); + } } data = null; // XXX null out pack['data'] too? var compressedData = { @@ -61,6 +71,7 @@ mergeInto(LibraryManager.library, { cachedIndex: -1, offsets: [], // chunk# => start in compressed data sizes: [], + successes: successes, // 1 if chunk is compressed }; compressedData.cachedChunk = compressedData.data.subarray(compressedData.cachedOffset); assert(compressedData.cachedChunk.length === LZ4FS.CHUNK_SIZE); @@ -171,19 +182,26 @@ mergeInto(LibraryManager.library, { var chunkIndex = Math.floor(start / LZ4FS.CHUNK_SIZE); var compressedStart = compressedData.offsets[chunkIndex]; var compressedSize = compressedData.sizes[chunkIndex]; - if (chunkIndex !== compressedData.cachedIndex) { - // decompress the chunk - //console.log('decompressing chunk ' + chunkIndex); - var compressed = compressedData.data.subarray(compressedStart, compressedStart + compressedSize); - //var t = Date.now(); - var originalSize = LZ4FS.LZ4.uncompress(compressed, compressedData.cachedChunk); - //console.log('decompress time: ' + (Date.now() - t)); - assert(originalSize === LZ4FS.CHUNK_SIZE); - compressedData.cachedIndex = chunkIndex; + var currChunk; + if (compressedData.successes[chunkIndex]) { + if (chunkIndex !== compressedData.cachedIndex) { + // decompress the chunk + //console.log('decompressing chunk ' + chunkIndex); + var compressed = compressedData.data.subarray(compressedStart, compressedStart + compressedSize); + //var t = Date.now(); + var originalSize = LZ4FS.LZ4.uncompress(compressed, compressedData.cachedChunk); + //console.log('decompress time: ' + (Date.now() - t)); + assert(originalSize === LZ4FS.CHUNK_SIZE); + compressedData.cachedIndex = chunkIndex; + } + currChunk = compressedData.cachedChunk; + } else { + // uncompressed + currChunk = compressedData.data.subarray(compressedStart, compressedStart + LZ4FS.CHUNK_SIZE); } var startInChunk = start % LZ4FS.CHUNK_SIZE; var endInChunk = Math.min(startInChunk + desired, LZ4FS.CHUNK_SIZE); - buffer.set(compressedData.cachedChunk.subarray(startInChunk, endInChunk), offset + written); + buffer.set(currChunk.subarray(startInChunk, endInChunk), offset + written); var currWritten = endInChunk - startInChunk; written += currWritten; } diff --git a/tests/fs/test_lz4fs.cpp b/tests/fs/test_lz4fs.cpp index a79b92ce15dec..b17b8170d3fa0 100644 --- a/tests/fs/test_lz4fs.cpp +++ b/tests/fs/test_lz4fs.cpp @@ -38,18 +38,29 @@ void EMSCRIPTEN_KEEPALIVE finish() { printf("%d read %d: %d failed num\n", counter, which, i); abort(); } - buffer[5] = 0; - char correct[] = "01234567890123456789"; - if (strncmp(buffer, correct + which + off, 5) != 0) { - printf("%d read %d: %d (%d) failed data\n", counter, which, i, i % 10); - abort(); + if (which != 2) { + buffer[5] = 0; + char correct[] = "01234567890123456789"; + if (strncmp(buffer, correct + which + off, 5) != 0) { + printf("%d read %d: %d (%d) failed data\n", counter, which, i, i % 10); + abort(); + } } counter++; } double after = emscripten_get_now(); + + printf("final test on random data\n"); + int ret = fseek(f3, 17, SEEK_SET); + assert(ret == 0); + num = fread(buffer, 1, 1, f3); + assert(num == 1); + assert(buffer[0] == 'X'); + fclose(f1); fclose(f2); fclose(f3); + printf("success. read IO time: %f (%d reads), total time: %f\n", after - before, counter, after - before_it_all); // all done diff --git a/tests/test_browser.py b/tests/test_browser.py index 64d7e2659bca3..e3c4f4c839a6a 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1032,9 +1032,12 @@ def test_fs_workerfs_package(self): self.btest(os.path.join('fs', 'test_workerfs_package.cpp'), '1', args=['--proxy-to-worker']) def test_fs_lz4fs_package(self): + import random open('file1.txt', 'w').write('0123456789' * (1024*128)) open('file2.txt', 'w').write('1234567890' * (1024*128)) - open('file3.txt', 'w').write('2345678901' * (1024*128)) + random_data = [chr(random.randint(0,255)) for x in range(1024*128*10)] + random_data[17] = 'X' + open('file3.txt', 'w').write(''.join(random_data)) Popen([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'file2.txt', 'file3.txt', '--separate-metadata', '--js-output=files.js']).communicate() self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '1', args=[], timeout=60) From 9337ce399e371b23052f295aee694b7997b39664 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 17:21:18 -0700 Subject: [PATCH 18/64] verify compressed size in lz4 test --- tests/fs/test_lz4fs.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tests/fs/test_lz4fs.cpp b/tests/fs/test_lz4fs.cpp index b17b8170d3fa0..46c344e251d83 100644 --- a/tests/fs/test_lz4fs.cpp +++ b/tests/fs/test_lz4fs.cpp @@ -5,6 +5,8 @@ #include +#define TOTAL_SIZE (10*1024*128) + double before_it_all; extern "C" { @@ -26,7 +28,7 @@ void EMSCRIPTEN_KEEPALIVE finish() { int counter = 0; int i = 0; printf("read from files\n"); - for (int i = 0; i < 10*1024*128 - 5; i += random() % 1000) { + for (int i = 0; i < TOTAL_SIZE - 5; i += random() % 1000) { int which = i % 3; FILE *f = files[which]; //printf("%d read %d: %d (%d)\n", counter, which, i, i % 10); @@ -48,6 +50,7 @@ void EMSCRIPTEN_KEEPALIVE finish() { } counter++; } + assert(counter == 2657); double after = emscripten_get_now(); printf("final test on random data\n"); @@ -74,6 +77,8 @@ int main() { before_it_all = emscripten_get_now(); EM_ASM({ + var COMPLETE_SIZE = 10*1024*128*3; + var meta, data; function maybeReady() { if (!(meta && data)) return; @@ -82,10 +87,16 @@ int main() { Module.print('loading into filesystem'); FS.mkdir('/files'); - FS.mount(LZ4FS, { + var root = FS.mount(LZ4FS, { packages: [{ metadata: meta, data: data }] }, '/files'); + var compressedSize = root.contents['file1.txt'].contents.compressedData.data.length; + var low = COMPLETE_SIZE/3; + var high = COMPLETE_SIZE/2; + console.log('seeing compressed size of ' + compressedSize + ', expect in ' + [low, high]); + assert(compressedSize > low && compressedSize < high); // more than 1/3, because 1/3 is uncompressible, but still, less than 1/2 + Module.ccall('finish'); } From 0edb0a904ae821d43c63fab831c2133b289a5cdb Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 19:19:36 -0700 Subject: [PATCH 19/64] refactor test_fs_lz4fs_package --- tests/fs/test_lz4fs.cpp | 11 ++++++++++- tests/test_browser.py | 8 ++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/tests/fs/test_lz4fs.cpp b/tests/fs/test_lz4fs.cpp index 46c344e251d83..a2ee3447286f0 100644 --- a/tests/fs/test_lz4fs.cpp +++ b/tests/fs/test_lz4fs.cpp @@ -67,7 +67,12 @@ void EMSCRIPTEN_KEEPALIVE finish() { printf("success. read IO time: %f (%d reads), total time: %f\n", after - before, counter, after - before_it_all); // all done - int result = 1; + int result; +#if LOAD_MANUALLY + result = 1; +#else + result = 2; +#endif REPORT_RESULT(); } @@ -76,6 +81,7 @@ void EMSCRIPTEN_KEEPALIVE finish() { int main() { before_it_all = emscripten_get_now(); +#if LOAD_MANUALLY EM_ASM({ var COMPLETE_SIZE = 10*1024*128*3; @@ -122,6 +128,9 @@ int main() { }); emscripten_exit_with_live_runtime(); +#else + finish(); +#endif return 1; } diff --git a/tests/test_browser.py b/tests/test_browser.py index e3c4f4c839a6a..53d12cfb1c9c9 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1032,14 +1032,18 @@ def test_fs_workerfs_package(self): self.btest(os.path.join('fs', 'test_workerfs_package.cpp'), '1', args=['--proxy-to-worker']) def test_fs_lz4fs_package(self): + # generate data import random open('file1.txt', 'w').write('0123456789' * (1024*128)) open('file2.txt', 'w').write('1234567890' * (1024*128)) random_data = [chr(random.randint(0,255)) for x in range(1024*128*10)] random_data[17] = 'X' open('file3.txt', 'w').write(''.join(random_data)) - Popen([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'file2.txt', 'file3.txt', '--separate-metadata', '--js-output=files.js']).communicate() - self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '1', args=[], timeout=60) + + # load the data into LZ4FS manually at runtime. This means we compress on the client. This is generally not recommended + print 'manual' + subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'file2.txt', 'file3.txt', '--separate-metadata', '--js-output=files.js']) + self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '1', args=['-DLOAD_MANUALLY'], timeout=60) def test_idbstore(self): secret = str(time.time()) From ba2757b77b9392fa994426e9143e6c6a68636131 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 19:49:17 -0700 Subject: [PATCH 20/64] refactor a compressPackage method in LZ4FS --- src/library_lz4fs.js | 106 ++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 52 deletions(-) diff --git a/src/library_lz4fs.js b/src/library_lz4fs.js index f1c4410ea3b70..74ee93249b89a 100644 --- a/src/library_lz4fs.js +++ b/src/library_lz4fs.js @@ -33,58 +33,7 @@ mergeInto(LibraryManager.library, { return parts[parts.length-1]; } mount.opts["packages"].forEach(function(pack) { - // compress the data in chunks - var data = pack['data']; - assert(data instanceof ArrayBuffer); - data = new Uint8Array(data); - console.log('compressing package of size ' + data.length); - var compressedChunks = []; - var successes = []; - var offset = 0; - var total = 0; - while (offset < data.length) { - var chunk = data.subarray(offset, offset + LZ4FS.CHUNK_SIZE); - //console.log('compress a chunk ' + [offset, total, data.length]); - offset += LZ4FS.CHUNK_SIZE; - var bound = LZ4FS.LZ4.compressBound(chunk.length); - var compressed = new Uint8Array(bound); - var compressedSize = LZ4FS.LZ4.compress(chunk, compressed); - if (compressedSize > 0) { - assert(compressedSize <= bound); - compressed = compressed.subarray(0, compressedSize); - compressedChunks.push(compressed); - total += compressedSize; - successes.push(1); - } else { - assert(compressedSize === 0); - // failure to compress :( - compressedChunks.push(chunk); - total += LZ4FS.CHUNK_SIZE; - successes.push(0); - } - } - data = null; // XXX null out pack['data'] too? - var compressedData = { - data: new Uint8Array(total + LZ4FS.CHUNK_SIZE), // store all the compressed data, plus room for one cached decompressed chunk, in one fast array - cachedOffset: total, - cachedChunk: null, - cachedIndex: -1, - offsets: [], // chunk# => start in compressed data - sizes: [], - successes: successes, // 1 if chunk is compressed - }; - compressedData.cachedChunk = compressedData.data.subarray(compressedData.cachedOffset); - assert(compressedData.cachedChunk.length === LZ4FS.CHUNK_SIZE); - offset = 0; - for (var i = 0; i < compressedChunks.length; i++) { - compressedData.data.set(compressedChunks[i], offset); - compressedData.offsets[i] = offset; - compressedData.sizes[i] = compressedChunks[i].length - offset += compressedChunks[i].length; - } - console.log('compressed package into ' + compressedData.data.length); - assert(offset === total); - compressedChunks.length = 0; + var compressedData = LZ4FS.compressPackage(pack['data']); console.log('mounting package'); pack['metadata'].files.forEach(function(file) { var name = file.filename.substr(1); // remove initial slash @@ -97,6 +46,59 @@ mergeInto(LibraryManager.library, { }); return root; }, + compressPackage: function(data) { + // compress the data in chunks + assert(data instanceof ArrayBuffer); + data = new Uint8Array(data); + console.log('compressing package of size ' + data.length); + var compressedChunks = []; + var successes = []; + var offset = 0; + var total = 0; + while (offset < data.length) { + var chunk = data.subarray(offset, offset + LZ4FS.CHUNK_SIZE); + //console.log('compress a chunk ' + [offset, total, data.length]); + offset += LZ4FS.CHUNK_SIZE; + var bound = LZ4FS.LZ4.compressBound(chunk.length); + var compressed = new Uint8Array(bound); + var compressedSize = LZ4FS.LZ4.compress(chunk, compressed); + if (compressedSize > 0) { + assert(compressedSize <= bound); + compressed = compressed.subarray(0, compressedSize); + compressedChunks.push(compressed); + total += compressedSize; + successes.push(1); + } else { + assert(compressedSize === 0); + // failure to compress :( + compressedChunks.push(chunk); + total += LZ4FS.CHUNK_SIZE; + successes.push(0); + } + } + data = null; // XXX null out pack['data'] too? + var compressedData = { + data: new Uint8Array(total + LZ4FS.CHUNK_SIZE), // store all the compressed data, plus room for one cached decompressed chunk, in one fast array + cachedOffset: total, + cachedChunk: null, + cachedIndex: -1, + offsets: [], // chunk# => start in compressed data + sizes: [], + successes: successes, // 1 if chunk is compressed + }; + compressedData.cachedChunk = compressedData.data.subarray(compressedData.cachedOffset); + assert(compressedData.cachedChunk.length === LZ4FS.CHUNK_SIZE); + offset = 0; + for (var i = 0; i < compressedChunks.length; i++) { + compressedData.data.set(compressedChunks[i], offset); + compressedData.offsets[i] = offset; + compressedData.sizes[i] = compressedChunks[i].length + offset += compressedChunks[i].length; + } + console.log('compressed package into ' + compressedData.data.length); + assert(offset === total); + return compressedData; + }, createNode: function (parent, name, mode, dev, contents, mtime) { var node = FS.createNode(parent, name, mode); node.mode = mode; From f9342149ce82013e2ddfa0fd256d2895f9b0721b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 19:59:45 -0700 Subject: [PATCH 21/64] prepare to receive precompressed data in LZ4FS --- src/library_lz4fs.js | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/library_lz4fs.js b/src/library_lz4fs.js index 74ee93249b89a..840f7210464d0 100644 --- a/src/library_lz4fs.js +++ b/src/library_lz4fs.js @@ -33,7 +33,12 @@ mergeInto(LibraryManager.library, { return parts[parts.length-1]; } mount.opts["packages"].forEach(function(pack) { - var compressedData = LZ4FS.compressPackage(pack['data']); + var compressedData = pack['compressedData']; + if (!compressedData) compressedData = LZ4FS.compressPackage(pack['data']); + compressedData.cachedChunk = compressedData.data.subarray(compressedData.cachedOffset); + assert(compressedData.cachedChunk.length === LZ4FS.CHUNK_SIZE); + compressedData.cachedIndex = -1; + console.log('mounting package'); pack['metadata'].files.forEach(function(file) { var name = file.filename.substr(1); // remove initial slash @@ -86,8 +91,6 @@ mergeInto(LibraryManager.library, { sizes: [], successes: successes, // 1 if chunk is compressed }; - compressedData.cachedChunk = compressedData.data.subarray(compressedData.cachedOffset); - assert(compressedData.cachedChunk.length === LZ4FS.CHUNK_SIZE); offset = 0; for (var i = 0; i < compressedChunks.length; i++) { compressedData.data.set(compressedChunks[i], offset); From b2e49cfb3e3ada076846e4cece6d435daf3a85bd Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 20:53:35 -0700 Subject: [PATCH 22/64] refactor compressPackage into mini-lz4.js --- src/library_lz4fs.js | 56 +++----------------------------------------- src/mini-lz4.js | 54 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 53 deletions(-) diff --git a/src/library_lz4fs.js b/src/library_lz4fs.js index 840f7210464d0..04cf872a3f5e2 100644 --- a/src/library_lz4fs.js +++ b/src/library_lz4fs.js @@ -4,7 +4,7 @@ mergeInto(LibraryManager.library, { $LZ4FS: { DIR_MODE: {{{ cDefine('S_IFDIR') }}} | 511 /* 0777 */, FILE_MODE: {{{ cDefine('S_IFREG') }}} | 511 /* 0777 */, - CHUNK_SIZE: 2048, // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea + CHUNK_SIZE: -1, LZ4: null, mount: function (mount) { if (!LZ4FS.LZ4) { @@ -12,6 +12,7 @@ mergeInto(LibraryManager.library, { {{{ read('mini-lz4.js') }}}; return MiniLZ4; })(); + LZ4FS.CHUNK_SIZE = LZ4FS.LZ4.CHUNK_SIZE; } var root = LZ4FS.createNode(null, '/', LZ4FS.DIR_MODE, 0); var createdParents = {}; @@ -34,7 +35,7 @@ mergeInto(LibraryManager.library, { } mount.opts["packages"].forEach(function(pack) { var compressedData = pack['compressedData']; - if (!compressedData) compressedData = LZ4FS.compressPackage(pack['data']); + if (!compressedData) compressedData = LZ4FS.LZ4.compressPackage(pack['data']); compressedData.cachedChunk = compressedData.data.subarray(compressedData.cachedOffset); assert(compressedData.cachedChunk.length === LZ4FS.CHUNK_SIZE); compressedData.cachedIndex = -1; @@ -51,57 +52,6 @@ mergeInto(LibraryManager.library, { }); return root; }, - compressPackage: function(data) { - // compress the data in chunks - assert(data instanceof ArrayBuffer); - data = new Uint8Array(data); - console.log('compressing package of size ' + data.length); - var compressedChunks = []; - var successes = []; - var offset = 0; - var total = 0; - while (offset < data.length) { - var chunk = data.subarray(offset, offset + LZ4FS.CHUNK_SIZE); - //console.log('compress a chunk ' + [offset, total, data.length]); - offset += LZ4FS.CHUNK_SIZE; - var bound = LZ4FS.LZ4.compressBound(chunk.length); - var compressed = new Uint8Array(bound); - var compressedSize = LZ4FS.LZ4.compress(chunk, compressed); - if (compressedSize > 0) { - assert(compressedSize <= bound); - compressed = compressed.subarray(0, compressedSize); - compressedChunks.push(compressed); - total += compressedSize; - successes.push(1); - } else { - assert(compressedSize === 0); - // failure to compress :( - compressedChunks.push(chunk); - total += LZ4FS.CHUNK_SIZE; - successes.push(0); - } - } - data = null; // XXX null out pack['data'] too? - var compressedData = { - data: new Uint8Array(total + LZ4FS.CHUNK_SIZE), // store all the compressed data, plus room for one cached decompressed chunk, in one fast array - cachedOffset: total, - cachedChunk: null, - cachedIndex: -1, - offsets: [], // chunk# => start in compressed data - sizes: [], - successes: successes, // 1 if chunk is compressed - }; - offset = 0; - for (var i = 0; i < compressedChunks.length; i++) { - compressedData.data.set(compressedChunks[i], offset); - compressedData.offsets[i] = offset; - compressedData.sizes[i] = compressedChunks[i].length - offset += compressedChunks[i].length; - } - console.log('compressed package into ' + compressedData.data.length); - assert(offset === total); - return compressedData; - }, createNode: function (parent, name, mode, dev, contents, mtime) { var node = FS.createNode(parent, name, mode); node.mode = mode; diff --git a/src/mini-lz4.js b/src/mini-lz4.js index ca7ec6b3975d3..ecc27955c8046 100644 --- a/src/mini-lz4.js +++ b/src/mini-lz4.js @@ -269,6 +269,60 @@ function compressBlock (src, dst, pos, hashTable, sIdx, eIdx) { return dpos } +exports.CHUNK_SIZE = 2048; // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea + +exports.compressPackage = function(data) { + // compress the data in chunks + assert(data instanceof ArrayBuffer); + data = new Uint8Array(data); + console.log('compressing package of size ' + data.length); + var compressedChunks = []; + var successes = []; + var offset = 0; + var total = 0; + while (offset < data.length) { + var chunk = data.subarray(offset, offset + exports.CHUNK_SIZE); + //console.log('compress a chunk ' + [offset, total, data.length]); + offset += exports.CHUNK_SIZE; + var bound = exports.compressBound(chunk.length); + var compressed = new Uint8Array(bound); + var compressedSize = exports.compress(chunk, compressed); + if (compressedSize > 0) { + assert(compressedSize <= bound); + compressed = compressed.subarray(0, compressedSize); + compressedChunks.push(compressed); + total += compressedSize; + successes.push(1); + } else { + assert(compressedSize === 0); + // failure to compress :( + compressedChunks.push(chunk); + total += exports.CHUNK_SIZE; + successes.push(0); + } + } + data = null; // XXX null out pack['data'] too? + var compressedData = { + data: new Uint8Array(total + exports.CHUNK_SIZE), // store all the compressed data, plus room for one cached decompressed chunk, in one fast array + cachedOffset: total, + cachedChunk: null, + cachedIndex: -1, + offsets: [], // chunk# => start in compressed data + sizes: [], + successes: successes, // 1 if chunk is compressed + }; + offset = 0; + for (var i = 0; i < compressedChunks.length; i++) { + compressedData.data.set(compressedChunks[i], offset); + compressedData.offsets[i] = offset; + compressedData.sizes[i] = compressedChunks[i].length + offset += compressedChunks[i].length; + } + console.log('compressed package into ' + compressedData.data.length); + assert(offset === total); + return compressedData; +}; + return exports; })(); From a0d4fdd91cfecb44ea753ab687055ef150d12aab Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 21:36:40 -0700 Subject: [PATCH 23/64] initial work on lz4 compression in file packager --- tools/file_packager.py | 88 ++++++++++++++++-------- tools/lz4-compress.js | 152 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+), 28 deletions(-) create mode 100644 tools/lz4-compress.js diff --git a/tools/file_packager.py b/tools/file_packager.py index 8f3d0eabbc6a5..9392d3c0ca730 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -11,7 +11,7 @@ Usage: - file_packager.py TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]] [--compress COMPRESSION_DATA] [--crunch[=X]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--no-heap-copy] [--separate-metadata] + file_packager.py TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]] [--compress COMPRESSION_DATA] [--crunch[=X]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--no-heap-copy] [--separate-metadata] [--lz4=DIR_NAME] --preload , --embed See emcc --help for more details on those options. @@ -40,6 +40,10 @@ --separate-metadata Stores package metadata separately. Only applicable when preloading and js-output file is specified. + --lz4=DIR_NAME Uses LZ4FS. This compresses the data using LZ4 when this utility is run, then the client decompresses chunks on the fly, avoiding storing + the entire decompressed data in memory at once. DIR_NAME is the name of a directory to mount the files under (LZ4FS cannot be mounted + on /, it has to be a subdirectory) + Notes: * The file packager generates unix-style file paths. So if you are on windows and a file is accessed at @@ -53,6 +57,7 @@ import posixpath import shared from shared import Compression, execute, suffix, unsuffixed +from jsrun import run_js from subprocess import Popen, PIPE, STDOUT import fnmatch import json @@ -95,6 +100,7 @@ # If set to True, the package metadata is stored separately from js-output file which makes js-output file immutable to the package content changes. # If set to False, the package metadata is stored inside the js-output file which makes js-output file to mutate on each invocation of this packager tool. separate_metadata = False +lz4 = None for arg in sys.argv[2:]: if arg == '--preload': @@ -120,6 +126,9 @@ elif arg == '--separate-metadata': separate_metadata = True leading = '' + elif arg.startswith('--lz4='): + lz4 = arg.split('=')[1] + leading = '' elif arg.startswith('--js-output'): jsoutput = arg.split('=')[1] if '=' in arg else None leading = '' @@ -486,35 +495,58 @@ def was_seen(name): assert 0 if has_preloaded: - # Get the big archive and split it up - if no_heap_copy: - use_data = ''' - // copy the entire loaded file into a spot in the heap. Files will refer to slices in that. They cannot be freed though - // (we may be allocating before malloc is ready, during startup). - var ptr = Module['getMemory'](byteArray.length); - Module['HEAPU8'].set(byteArray, ptr); - DataRequest.prototype.byteArray = Module['HEAPU8'].subarray(ptr, ptr+byteArray.length); -''' - else: - use_data = ''' - // Reuse the bytearray from the XHR as the source for file reads. - DataRequest.prototype.byteArray = byteArray; -''' - use_data += ''' - var files = metadata.files; - for (i = 0; i < files.length; ++i) { - DataRequest.prototype.requests[files[i].filename].onload(); - } + if not lz4: + # Get the big archive and split it up + if no_heap_copy: + use_data = ''' + // copy the entire loaded file into a spot in the heap. Files will refer to slices in that. They cannot be freed though + // (we may be allocating before malloc is ready, during startup). + var ptr = Module['getMemory'](byteArray.length); + Module['HEAPU8'].set(byteArray, ptr); + DataRequest.prototype.byteArray = Module['HEAPU8'].subarray(ptr, ptr+byteArray.length); ''' - use_data += " Module['removeRunDependency']('datafile_%s');\n" % data_target - - if Compression.on: + else: + use_data = ''' + // Reuse the bytearray from the XHR as the source for file reads. + DataRequest.prototype.byteArray = byteArray; + ''' + use_data += ''' + var files = metadata.files; + for (i = 0; i < files.length; ++i) { + DataRequest.prototype.requests[files[i].filename].onload(); + } + ''' + use_data += " Module['removeRunDependency']('datafile_%s');\n" % data_target + + if Compression.on: + use_data = ''' + Module["decompress"](byteArray, function(decompressed) { + byteArray = new Uint8Array(decompressed); + %s + }); + ''' % use_data + else: + # LZ4FS usage + temp = data_target + '.orig' + shutil.move(data_target, temp) + run_js(shared.path_from_root('tools', 'lz4-compress.js'), shared.NODE_JS, [shared.path_from_root('src', 'mini-lz4.js'), temp, data_target], stdout=PIPE) + os.unlink(temp) use_data = ''' - Module["decompress"](byteArray, function(decompressed) { - byteArray = new Uint8Array(decompressed); - %s - }); - ''' % use_data + var LZ4_DIR = '%s'; + FS.mkdir('/' + LZ4_DIR); + var root = FS.mount(LZ4FS, { + packages: [{ metadata: metadata, compressedData: { + data: + cachedOffset: total, + cachedChunk: null, + cachedIndex: -1, + offsets: + sizes: + successes: + } }] + }, '/' + LZ4_DIR); + Module['removeRunDependency']('datafile_%s'); + ''' % (lz4, data_target) package_uuid = uuid.uuid4(); package_name = Compression.compressed_name(data_target) if Compression.on else data_target diff --git a/tools/lz4-compress.js b/tools/lz4-compress.js new file mode 100644 index 0000000000000..3b1eadd90f7d5 --- /dev/null +++ b/tools/lz4-compress.js @@ -0,0 +1,152 @@ +// *** Environment setup code *** +var arguments_ = []; +var debug = false; + +var ENVIRONMENT_IS_NODE = typeof process === 'object'; +var ENVIRONMENT_IS_WEB = typeof window === 'object'; +var ENVIRONMENT_IS_WORKER = typeof importScripts === 'function'; +var ENVIRONMENT_IS_SHELL = !ENVIRONMENT_IS_WEB && !ENVIRONMENT_IS_NODE && !ENVIRONMENT_IS_WORKER; + +if (ENVIRONMENT_IS_NODE) { + // Expose functionality in the same simple way that the shells work + // Note that we pollute the global namespace here, otherwise we break in node + print = function(x) { + process['stdout'].write(x + '\n'); + }; + printErr = function(x) { + process['stderr'].write(x + '\n'); + }; + + var nodeFS = require('fs'); + var nodePath = require('path'); + + if (!nodeFS.existsSync) { + nodeFS.existsSync = function(path) { + try { + return !!nodeFS.readFileSync(path); + } catch(e) { + return false; + } + } + } + + function find(filename) { + var prefixes = [nodePath.join(__dirname, '..', 'src'), process.cwd()]; + for (var i = 0; i < prefixes.length; ++i) { + var combined = nodePath.join(prefixes[i], filename); + if (nodeFS.existsSync(combined)) { + return combined; + } + } + return filename; + } + + read = function(filename, binary) { + filename = nodePath['normalize'](filename); + var ret = nodeFS['readFileSync'](filename); + // The path is absolute if the normalized version is the same as the resolved. + if (!ret && filename != nodePath['resolve'](filename)) { + filename = path.join(__dirname, '..', 'src', filename); + ret = nodeFS['readFileSync'](filename); + } + if (ret && !binary) ret = ret.toString(); + return ret; + }; + + readBinary = function(filename) { return read(filename, true) }; + + load = function(f) { + globalEval(read(f)); + }; + + arguments_ = process['argv'].slice(2); + +} else if (ENVIRONMENT_IS_SHELL) { + // Polyfill over SpiderMonkey/V8 differences + if (!this['read']) { + this['read'] = function(f) { snarf(f) }; + } + + if (typeof scriptArgs != 'undefined') { + arguments_ = scriptArgs; + } else if (typeof arguments != 'undefined') { + arguments_ = arguments; + } + +} else if (ENVIRONMENT_IS_WEB) { + this['print'] = printErr = function(x) { + console.log(x); + }; + + this['read'] = function(url) { + var xhr = new XMLHttpRequest(); + xhr.open('GET', url, false); + xhr.send(null); + return xhr.responseText; + }; + + if (this['arguments']) { + arguments_ = arguments; + } +} else if (ENVIRONMENT_IS_WORKER) { + // We can do very little here... + + this['load'] = importScripts; + +} else { + throw 'Unknown runtime environment. Where are we?'; +} + +function globalEval(x) { + eval.call(null, x); +} + +if (typeof load === 'undefined' && typeof read != 'undefined') { + this['load'] = function(f) { + globalEval(read(f)); + }; +} + +if (typeof printErr === 'undefined') { + this['printErr'] = function(){}; +} + +if (typeof print === 'undefined') { + this['print'] = printErr; +} + +assert = function(x) { + if (!x) throw 'assertion failed ' + new Error().stack; +} + +if (!Math['imul'] || Math['imul'](0xffffffff, 5) !== -5) Math['imul'] = function imul(a, b) { + var ah = a >>> 16; + var al = a & 0xffff; + var bh = b >>> 16; + var bl = b & 0xffff; + return (al*bl + ((ah*bl + al*bh) << 16))|0; +}; + +// *** Environment setup code *** + +var lz4 = arguments_[0]; +var input = arguments_[1]; +var output = arguments_[2]; + +printErr('lz4-compressor called with ' + [lz4, input, output]); + +load(lz4); + +var data = readBinary(input); +if (!(data instanceof ArrayBuffer)) { + printErr('converting to ArrayBuffer'); + data = new Uint8Array(data).buffer; +} +printErr('input size: ' + [data.byteLength]); + +printErr('compressing...'); +var compressedData = MiniLZ4.compressPackage(data); +nodeFS['writeFileSync'](output, Buffer(compressedData.data)); +compressedData.data = null; +print(JSON.stringify(compressedData)); + From 7c24e3471215c8daae2ed1f855e290e3319a150f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 21:52:57 -0700 Subject: [PATCH 24/64] finish lz4 test with both normal and manual loading of files --- tests/test_browser.py | 6 ++++++ tools/file_packager.py | 18 ++++++------------ tools/lz4-compress.js | 6 ++---- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/tests/test_browser.py b/tests/test_browser.py index 53d12cfb1c9c9..5fbd8f152ecdd 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1040,6 +1040,12 @@ def test_fs_lz4fs_package(self): random_data[17] = 'X' open('file3.txt', 'w').write(''.join(random_data)) + # compress in the file packager, on the server. the client receives compressed data and can just use it. this is typical usage + print 'normal' + out = subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'file2.txt', 'file3.txt', '--lz4=files']) + open('files.js', 'w').write(out) + self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '2', args=['--pre-js', 'files.js'], timeout=60) + # load the data into LZ4FS manually at runtime. This means we compress on the client. This is generally not recommended print 'manual' subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'file2.txt', 'file3.txt', '--separate-metadata', '--js-output=files.js']) diff --git a/tools/file_packager.py b/tools/file_packager.py index 9392d3c0ca730..99906474b4832 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -459,7 +459,7 @@ def was_seen(name): for (i = 0; i < files.length; ++i) { new DataRequest(files[i].start, files[i].end, files[i].crunched, files[i].audio).open('GET', files[i].filename); } -''') +''' if not lz4 else '') counter = 0 for file_ in data_files: @@ -529,24 +529,18 @@ def was_seen(name): # LZ4FS usage temp = data_target + '.orig' shutil.move(data_target, temp) - run_js(shared.path_from_root('tools', 'lz4-compress.js'), shared.NODE_JS, [shared.path_from_root('src', 'mini-lz4.js'), temp, data_target], stdout=PIPE) + meta = run_js(shared.path_from_root('tools', 'lz4-compress.js'), shared.NODE_JS, [shared.path_from_root('src', 'mini-lz4.js'), temp, data_target], stdout=PIPE) os.unlink(temp) use_data = ''' var LZ4_DIR = '%s'; FS.mkdir('/' + LZ4_DIR); + var compressedData = %s; + compressedData.data = byteArray; var root = FS.mount(LZ4FS, { - packages: [{ metadata: metadata, compressedData: { - data: - cachedOffset: total, - cachedChunk: null, - cachedIndex: -1, - offsets: - sizes: - successes: - } }] + packages: [{ metadata: metadata, compressedData: compressedData }] }, '/' + LZ4_DIR); Module['removeRunDependency']('datafile_%s'); - ''' % (lz4, data_target) + ''' % (lz4, meta, data_target) package_uuid = uuid.uuid4(); package_name = Compression.compressed_name(data_target) if Compression.on else data_target diff --git a/tools/lz4-compress.js b/tools/lz4-compress.js index 3b1eadd90f7d5..1732b6352910e 100644 --- a/tools/lz4-compress.js +++ b/tools/lz4-compress.js @@ -127,14 +127,14 @@ if (!Math['imul'] || Math['imul'](0xffffffff, 5) !== -5) Math['imul'] = function return (al*bl + ((ah*bl + al*bh) << 16))|0; }; +console.log = printErr; + // *** Environment setup code *** var lz4 = arguments_[0]; var input = arguments_[1]; var output = arguments_[2]; -printErr('lz4-compressor called with ' + [lz4, input, output]); - load(lz4); var data = readBinary(input); @@ -142,9 +142,7 @@ if (!(data instanceof ArrayBuffer)) { printErr('converting to ArrayBuffer'); data = new Uint8Array(data).buffer; } -printErr('input size: ' + [data.byteLength]); -printErr('compressing...'); var compressedData = MiniLZ4.compressPackage(data); nodeFS['writeFileSync'](output, Buffer(compressedData.data)); compressedData.data = null; From 8426a80225c523830d058852ae6702363de82ded Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Sun, 6 Sep 2015 21:54:32 -0700 Subject: [PATCH 25/64] add assert on processPackageData input type --- tools/file_packager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/file_packager.py b/tools/file_packager.py index 99906474b4832..224bf16e0c3d2 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -716,6 +716,7 @@ def was_seen(name): function processPackageData(arrayBuffer) { Module.finishedDataFileDownloads++; assert(arrayBuffer, 'Loading data file failed.'); + assert(arrayBuffer instanceof ArrayBuffer, 'bad input to processPackageData'); var byteArray = new Uint8Array(arrayBuffer); var curr; %s From 0699db96c25f6279a3c93de18dd58d58dd7ee659 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 7 Sep 2015 16:38:37 +0300 Subject: [PATCH 26/64] Don't ask the user if he wants to close the browser when multiple tabs are open and running Firefox via emrun. --- emrun | 2 ++ 1 file changed, 2 insertions(+) diff --git a/emrun b/emrun index 48dd16eb36ca6..a404dc81d7213 100755 --- a/emrun +++ b/emrun @@ -226,6 +226,8 @@ user_pref('browser.customizemode.tip0.shown', true); user_pref("browser.toolbarbuttons.introduced.pocket-button", true); // Start in private browsing mode to not cache anything to disk (everything will be wiped anyway after this run) user_pref("browser.privatebrowsing.autostart", true); +// Don't ask the user if he wants to close the browser when there are multiple tabs. +user_pref("browser.tabs.warnOnClose", false); ''') f.close() logv('create_emrun_safe_firefox_profile: Created new Firefox profile "' + temp_firefox_profile_dir + '"') From 6bf85e754d2af7dd22430662aa4f9f1dc5c28581 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 7 Sep 2015 20:00:51 +0300 Subject: [PATCH 27/64] Pass -no-remote when running a page with emrun and --safe_firefox_profile in order to not conflict with any existing running Firefox instances. --- emrun | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emrun b/emrun index a404dc81d7213..3ddafe0996ba1 100755 --- a/emrun +++ b/emrun @@ -1175,7 +1175,7 @@ def main(): # cleans up the temporary profile if one exists. if processname_killed_atexit == 'firefox' and options.safe_firefox_profile: profile_dir = create_emrun_safe_firefox_profile() - browser += ['-profile', profile_dir.replace('\\', '/')] + browser += ['-no-remote', '-profile', profile_dir.replace('\\', '/')] if options.system_info: logi('Time of run: ' + time.strftime("%x %X")) From 2f20ab88daf6c6afa5b5b10042306b51156c4a4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 7 Sep 2015 20:11:59 +0300 Subject: [PATCH 28/64] Set dom.allow_scripts_to_close_windows = true when running Firefox under emrun with --safe_firefox_profile, so that a test page can gracefully close itself without emrun having to forcibly kill it. --- emrun | 2 ++ 1 file changed, 2 insertions(+) diff --git a/emrun b/emrun index 3ddafe0996ba1..67074ca88904a 100755 --- a/emrun +++ b/emrun @@ -228,6 +228,8 @@ user_pref("browser.toolbarbuttons.introduced.pocket-button", true); user_pref("browser.privatebrowsing.autostart", true); // Don't ask the user if he wants to close the browser when there are multiple tabs. user_pref("browser.tabs.warnOnClose", false); +// Allow the launched script window to close itself, so that we don't need to kill the browser process in order to move on. +user_pref("dom.allow_scripts_to_close_windows", true); ''') f.close() logv('create_emrun_safe_firefox_profile: Created new Firefox profile "' + temp_firefox_profile_dir + '"') From a3dcfdd50c835703cdc658d1a35e3a2452d9b074 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 7 Sep 2015 20:32:02 +0300 Subject: [PATCH 29/64] Fix emrun test script to only call to user_pref() to set prefs (the other directives did not quite work). When building with --emrun, have exit() close the current browser window automatically. Run browser.test_emrun with --safe_firefox_profile. --- emrun | 13 ++++++------- src/emrun_postjs.js | 16 +++++++++++++++- tests/test_browser.py | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/emrun b/emrun index 67074ca88904a..4b0dace24465b 100755 --- a/emrun +++ b/emrun @@ -206,16 +206,15 @@ user_pref("app.update.silent", false); user_pref("app.update.mode", 0); user_pref("app.update.service.enabled", false); // Don't check compatibility with add-ons, or (auto)update them -clearPref("extensions.lastAppVersion"); -lockPref("plugins.hide_infobar_for_outdated_plugin", true); -clearPref("plugins.update.url"); +user_pref("extensions.lastAppVersion", ''); +user_pref("plugins.hide_infobar_for_outdated_plugin", true); +user_pref("plugins.update.url", ''); // Disable health reporter -lockPref("datareporting.healthreport.service.enabled", false); +user_pref("datareporting.healthreport.service.enabled", false); // Disable crash reporter -lockPref("toolkit.crashreporter.enabled", false); -Components.classes["@mozilla.org/toolkit/crash-reporter;1"].getService(Components.interfaces.nsICrashReporter).submitReports = false; +user_pref("toolkit.crashreporter.enabled", false); // Don't show WhatsNew on first run after every update -pref("browser.startup.homepage_override.mstone","ignore"); +user_pref("browser.startup.homepage_override.mstone","ignore"); // Don't show 'know your rights' and a bunch of other nag windows at startup user_pref("browser.rights.3.shown", true); user_pref('devtools.devedition.promo.shown', true); diff --git a/src/emrun_postjs.js b/src/emrun_postjs.js index 63da3f8ece0a5..41a1f9920c664 100644 --- a/src/emrun_postjs.js +++ b/src/emrun_postjs.js @@ -5,12 +5,26 @@ if (typeof window === "object" && (typeof ENVIRONMENT_IS_PTHREAD === 'undefined' http.open("POST", "stdio.html", true); http.send(msg); } + function postExit(msg) { + var http = new XMLHttpRequest(); + http.onreadystatechange = function() { + if (http.readyState == 4 /*DONE*/) { + try { + // Try closing the current browser window, since it exit()ed itself. This can shut down the browser process + // and emrun does not need to kill the whole browser process. + if (typeof window !== 'undefined' && window.close) window.close(); + } catch(e) {} + } + } + http.open("POST", "stdio.html", true); + http.send(msg); + } // If the address contains localhost, or we are running the page from port 6931, we can assume we're running the test runner and should post stdout logs. if (document.URL.search("localhost") != -1 || document.URL.search(":6931/") != -1) { var emrun_http_sequence_number = 1; var prevPrint = Module['print']; var prevErr = Module['printErr']; - function emrun_exit() { post('^exit^'+EXITSTATUS); }; + function emrun_exit() { postExit('^exit^'+EXITSTATUS); }; Module['addOnExit'](emrun_exit); Module['print'] = function emrun_print(text) { post('^out^'+(emrun_http_sequence_number++)+'^'+encodeURIComponent(text)); prevPrint(text); } Module['printErr'] = function emrun_printErr(text) { post('^err^'+(emrun_http_sequence_number++)+'^'+encodeURIComponent(text)); prevErr(text); } diff --git a/tests/test_browser.py b/tests/test_browser.py index 0bf1b0743d671..30a4ea123a4c1 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1856,7 +1856,7 @@ def test_emrun(self): # and the browser will not close as part of the test, pinning down the cwd on Windows and it wouldn't be possible to delete it. Therefore switch away from that directory # before launching. os.chdir(path_from_root()) - args = [PYTHON, path_from_root('emrun'), '--timeout', '30', '--verbose', '--log_stdout', os.path.join(outdir, 'stdout.txt'), '--log_stderr', os.path.join(outdir, 'stderr.txt')] + args = [PYTHON, path_from_root('emrun'), '--timeout', '30', '--safe_firefox_profile', '--verbose', '--log_stdout', os.path.join(outdir, 'stdout.txt'), '--log_stderr', os.path.join(outdir, 'stderr.txt')] if emscripten_browser is not None: args += ['--browser', emscripten_browser] args += [os.path.join(outdir, 'hello_world.html'), '1', '2', '--3'] From 1217bf6ca82aef0ef849b1f807792a2097b1e0ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 7 Sep 2015 20:41:49 +0300 Subject: [PATCH 30/64] Have emrun more diligently delete the temporary Firefox profile it created, if at all possible. --- emrun | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/emrun b/emrun index 4b0dace24465b..4ba2e496a22b6 100755 --- a/emrun +++ b/emrun @@ -1174,7 +1174,7 @@ def main(): # Create temporary Firefox profile to run the page with. This is important to run after kill_browser_process()/kill_on_start op above, since that # cleans up the temporary profile if one exists. - if processname_killed_atexit == 'firefox' and options.safe_firefox_profile: + if processname_killed_atexit == 'firefox' and options.safe_firefox_profile and not options.no_browser: profile_dir = create_emrun_safe_firefox_profile() browser += ['-no-remote', '-profile', profile_dir.replace('\\', '/')] @@ -1243,12 +1243,23 @@ def main(): if not options.no_browser: if options.kill_on_exit: kill_browser_process() - elif is_browser_process_alive(): - logv('Not terminating browser process, pass --kill_exit to terminate the browser when it calls exit().') + else: + if is_browser_process_alive(): + logv('Not terminating browser process, pass --kill_exit to terminate the browser when it calls exit().') + # If we have created a temporary Firefox profile, we would really really like to wait until the browser closes, + # or otherwise we'll just have to litter temp files and keep the temporary profile alive. It is possible here + # that the browser is cooperatively shutting down, but has not yet had time to do so, so wait for a short while. + if temp_firefox_profile_dir != None: time.sleep(3) + + if not is_browser_process_alive(): + # Browser is no longer running, make sure to clean up the temp Firefox profile, if we created one. + delete_emrun_safe_firefox_profile() return page_exit_code if __name__ == '__main__': returncode = main() logv('emrun quitting with process exit code ' + str(returncode)) + if temp_firefox_profile_dir != None: + logi('Warning: Had to leave behind a temporary Firefox profile directory ' + temp_firefox_profile_dir + ' because --safe_firefox_profile was set and the browser did not quit before emrun did.') sys.exit(returncode) From f195fcb7214b9c31655795f3caa3125d7d035b06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 7 Sep 2015 20:52:58 +0300 Subject: [PATCH 31/64] Fix a race condition between stdout/stderr prints vs exit() when using --emrun: fully wait until all stdout/stderr messages have been sent before closing the Firefox browser window. --- src/emrun_postjs.js | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/emrun_postjs.js b/src/emrun_postjs.js index 41a1f9920c664..d34ff0b6ec2ec 100644 --- a/src/emrun_postjs.js +++ b/src/emrun_postjs.js @@ -1,10 +1,10 @@ if (typeof window === "object" && (typeof ENVIRONMENT_IS_PTHREAD === 'undefined' || !ENVIRONMENT_IS_PTHREAD)) { function emrun_register_handlers() { - function post(msg) { - var http = new XMLHttpRequest(); - http.open("POST", "stdio.html", true); - http.send(msg); - } + // When C code exit()s, we may still have remaining stdout and stderr messages in flight. In that case, we can't close + // the browser until all those XHRs have finished, so the following state variables track that all communication is done, + // after which we can close. + var emrun_num_post_messages_in_flight = 0; + var emrun_should_close_itself = false; function postExit(msg) { var http = new XMLHttpRequest(); http.onreadystatechange = function() { @@ -19,12 +19,23 @@ if (typeof window === "object" && (typeof ENVIRONMENT_IS_PTHREAD === 'undefined' http.open("POST", "stdio.html", true); http.send(msg); } + function post(msg) { + var http = new XMLHttpRequest(); + ++emrun_num_post_messages_in_flight; + http.onreadystatechange = function() { + if (http.readyState == 4 /*DONE*/) { + if (--emrun_num_post_messages_in_flight == 0 && emrun_should_close_itself) postExit('^exit^'+EXITSTATUS); + } + } + http.open("POST", "stdio.html", true); + http.send(msg); + } // If the address contains localhost, or we are running the page from port 6931, we can assume we're running the test runner and should post stdout logs. if (document.URL.search("localhost") != -1 || document.URL.search(":6931/") != -1) { var emrun_http_sequence_number = 1; var prevPrint = Module['print']; var prevErr = Module['printErr']; - function emrun_exit() { postExit('^exit^'+EXITSTATUS); }; + function emrun_exit() { if (emrun_num_post_messages_in_flight == 0) postExit('^exit^'+EXITSTATUS); else emrun_should_close_itself = true; }; Module['addOnExit'](emrun_exit); Module['print'] = function emrun_print(text) { post('^out^'+(emrun_http_sequence_number++)+'^'+encodeURIComponent(text)); prevPrint(text); } Module['printErr'] = function emrun_printErr(text) { post('^err^'+(emrun_http_sequence_number++)+'^'+encodeURIComponent(text)); prevErr(text); } From 3c5149e0720364aee2c281bbc9e938618673c492 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Sep 2015 10:57:29 -0700 Subject: [PATCH 32/64] emit asm consts on separate lines --- emscripten.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emscripten.py b/emscripten.py index 7920aca2aa925..a70d500ac9c08 100755 --- a/emscripten.py +++ b/emscripten.py @@ -289,7 +289,7 @@ def save_settings(): return ASM_CONSTS[code](%s) | 0; }''' % (arity, ', '.join(all_args), ', '.join(args))) - pre = pre.replace('// === Body ===', '// === Body ===\n' + '\nvar ASM_CONSTS = [' + ', '.join(asm_consts) + '];\n' + '\n'.join(asm_const_funcs) + '\n') + pre = pre.replace('// === Body ===', '// === Body ===\n' + '\nvar ASM_CONSTS = [' + ',\n '.join(asm_consts) + '];\n' + '\n'.join(asm_const_funcs) + '\n') #if DEBUG: outfile.write('// pre\n') outfile.write(pre) From 15507088aae196729238df9a9bccdca3e6720835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 7 Sep 2015 21:08:08 +0300 Subject: [PATCH 33/64] Tune emrun to avoid various Firefox network update checks with the clean profile. --- emrun | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/emrun b/emrun index 4ba2e496a22b6..6cb3db8addd3b 100755 --- a/emrun +++ b/emrun @@ -229,6 +229,20 @@ user_pref("browser.privatebrowsing.autostart", true); user_pref("browser.tabs.warnOnClose", false); // Allow the launched script window to close itself, so that we don't need to kill the browser process in order to move on. user_pref("dom.allow_scripts_to_close_windows", true); +// Set various update timers to a large value in the future in order to not +// trigger a large mass of update HTTP traffic on each Firefox run on the clean profile. +// "01/01/2100" is 4102437600 as seconds since Unix epoch. +user_pref("app.update.lastUpdateTime.addon-background-update-timer", 4102437600); +user_pref("app.update.lastUpdateTime.background-update-timer", 4102437600); +user_pref("app.update.lastUpdateTime.blocklist-background-update-timer", 4102437600); +user_pref("app.update.lastUpdateTime.browser-cleanup-thumbnails", 4102437600); +user_pref("app.update.lastUpdateTime.experiments-update-timer", 4102437600); +user_pref("app.update.lastUpdateTime.search-engine-update-timer", 4102437600); +user_pref("app.update.lastUpdateTime.xpi-signature-verification", 4102437600); +user_pref("extensions.getAddons.cache.lastUpdate", 4102437600); +user_pref("media.gmp-eme-adobe.lastUpdate", 4102437600); +user_pref("media.gmp-gmpopenh264.lastUpdate", 4102437600); +user_pref("datareporting.healthreport.nextDataSubmissionTime", 4102437600439); ''') f.close() logv('create_emrun_safe_firefox_profile: Created new Firefox profile "' + temp_firefox_profile_dir + '"') From e87b53bc398a3a1311f7bec625fac356d1437b95 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Sep 2015 11:21:23 -0700 Subject: [PATCH 34/64] cache two chunks in LZ4FS to avoid thrashing at the boundaries of a chunk, and add testing --- src/library_lz4fs.js | 29 ++++++++++++++++------- src/mini-lz4.js | 6 ++--- tests/fs/test_lz4fs.cpp | 52 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 72 insertions(+), 15 deletions(-) diff --git a/src/library_lz4fs.js b/src/library_lz4fs.js index 04cf872a3f5e2..21429d0129810 100644 --- a/src/library_lz4fs.js +++ b/src/library_lz4fs.js @@ -36,10 +36,13 @@ mergeInto(LibraryManager.library, { mount.opts["packages"].forEach(function(pack) { var compressedData = pack['compressedData']; if (!compressedData) compressedData = LZ4FS.LZ4.compressPackage(pack['data']); - compressedData.cachedChunk = compressedData.data.subarray(compressedData.cachedOffset); - assert(compressedData.cachedChunk.length === LZ4FS.CHUNK_SIZE); - compressedData.cachedIndex = -1; - + assert(compressedData.cachedIndexes.length === compressedData.cachedChunks.length); + for (var i = 0; i < compressedData.cachedIndexes.length; i++) { + compressedData.cachedIndexes[i] = -1; + compressedData.cachedChunks[i] = compressedData.data.subarray(compressedData.cachedOffset + i*LZ4FS.CHUNK_SIZE, + compressedData.cachedOffset + (i+1)*LZ4FS.CHUNK_SIZE); + assert(compressedData.cachedChunks[i].length === LZ4FS.CHUNK_SIZE); + } console.log('mounting package'); pack['metadata'].files.forEach(function(file) { var name = file.filename.substr(1); // remove initial slash @@ -139,17 +142,25 @@ mergeInto(LibraryManager.library, { var compressedSize = compressedData.sizes[chunkIndex]; var currChunk; if (compressedData.successes[chunkIndex]) { - if (chunkIndex !== compressedData.cachedIndex) { + var found = compressedData.cachedIndexes.indexOf(chunkIndex); + if (found >= 0) { + currChunk = compressedData.cachedChunks[found]; + } else { // decompress the chunk - //console.log('decompressing chunk ' + chunkIndex); + compressedData.cachedIndexes.pop(); + compressedData.cachedIndexes.unshift(chunkIndex); + currChunk = compressedData.cachedChunks.pop(); + compressedData.cachedChunks.unshift(currChunk); + if (compressedData.debug) { + console.log('decompressing chunk ' + chunkIndex); + Module['decompressedChunks'] = (Module['decompressedChunks'] || 0) + 1; + } var compressed = compressedData.data.subarray(compressedStart, compressedStart + compressedSize); //var t = Date.now(); - var originalSize = LZ4FS.LZ4.uncompress(compressed, compressedData.cachedChunk); + var originalSize = LZ4FS.LZ4.uncompress(compressed, currChunk); //console.log('decompress time: ' + (Date.now() - t)); assert(originalSize === LZ4FS.CHUNK_SIZE); - compressedData.cachedIndex = chunkIndex; } - currChunk = compressedData.cachedChunk; } else { // uncompressed currChunk = compressedData.data.subarray(compressedStart, compressedStart + LZ4FS.CHUNK_SIZE); diff --git a/src/mini-lz4.js b/src/mini-lz4.js index ecc27955c8046..effc3e693f838 100644 --- a/src/mini-lz4.js +++ b/src/mini-lz4.js @@ -303,10 +303,10 @@ exports.compressPackage = function(data) { } data = null; // XXX null out pack['data'] too? var compressedData = { - data: new Uint8Array(total + exports.CHUNK_SIZE), // store all the compressed data, plus room for one cached decompressed chunk, in one fast array + data: new Uint8Array(total + exports.CHUNK_SIZE*2), // store all the compressed data, plus room for two cached decompressed chunk, in one fast array cachedOffset: total, - cachedChunk: null, - cachedIndex: -1, + cachedIndexes: [-1, -1], // cache last two blocks, so that reading 1,2,3 + preloading another block won't trigger decompress thrashing + cachedChunks: [null, null], offsets: [], // chunk# => start in compressed data sizes: [], successes: successes, // 1 if chunk is compressed diff --git a/tests/fs/test_lz4fs.cpp b/tests/fs/test_lz4fs.cpp index a2ee3447286f0..a689ecd17bae0 100644 --- a/tests/fs/test_lz4fs.cpp +++ b/tests/fs/test_lz4fs.cpp @@ -60,12 +60,57 @@ void EMSCRIPTEN_KEEPALIVE finish() { assert(num == 1); assert(buffer[0] == 'X'); + printf("read success. read IO time: %f (%d reads), total time: %f\n", after - before, counter, after - before_it_all); + +#if LOAD_MANUALLY + printf("caching tests\n"); + ret = fseek(f3, TOTAL_SIZE - 5, SEEK_SET); assert(ret == 0); + num = fread(buffer, 1, 1, f3); assert(num == 1); // read near the end + ret = fseek(f3, TOTAL_SIZE - 5000, SEEK_SET); assert(ret == 0); + num = fread(buffer, 1, 1, f3); assert(num == 1); // also near the end + EM_ASM({ + assert(!Module.decompressedChunks); + Module.compressedData.debug = true; + console.log('last cached indexes ' + Module.compressedData.cachedIndexes); + assert(Module.compressedData.cachedIndexes.indexOf(0) < 0); // 0 is not cached + }); + printf("multiple reads of same byte\n"); + for (int i = 0; i < 100; i++) { + ret = fseek(f1, 0, SEEK_SET); // read near the start, should trigger one decompress, then all cache hits + assert(ret == 0); + num = fread(buffer, 1, 1, f1); + assert(num == 1); + } + EM_ASM({ + assert(Module.decompressedChunks == 1, ['seeing', Module.decompressedChunks, 'decompressed chunks']); + }); + printf("multiple reads of adjoining byte\n"); + for (int i = 0; i < 100; i++) { + ret = fseek(f1, i, SEEK_SET); + assert(ret == 0); + num = fread(buffer, 1, 1, f1); + assert(num == 1); + } + EM_ASM({ + assert(Module.decompressedChunks == 1, ['seeing', Module.decompressedChunks, 'decompressed chunks']); + }); + printf("multiple reads across two chunks\n"); + for (int i = 0; i < 2100; i++) { + ret = fseek(f1, i, SEEK_SET); + assert(ret == 0); + num = fread(buffer, 1, 1, f1); + assert(num == 1); + } + EM_ASM({ + assert(Module.decompressedChunks == 2, ['seeing', Module.decompressedChunks, 'decompressed chunks']); + }); + printf("caching test ok\n"); +#endif + fclose(f1); fclose(f2); fclose(f3); - printf("success. read IO time: %f (%d reads), total time: %f\n", after - before, counter, after - before_it_all); - // all done int result; #if LOAD_MANUALLY @@ -97,7 +142,8 @@ int main() { packages: [{ metadata: meta, data: data }] }, '/files'); - var compressedSize = root.contents['file1.txt'].contents.compressedData.data.length; + Module.compressedData = root.contents['file1.txt'].contents.compressedData; + var compressedSize = Module.compressedData.data.length; var low = COMPLETE_SIZE/3; var high = COMPLETE_SIZE/2; console.log('seeing compressed size of ' + compressedSize + ', expect in ' + [low, high]); From baa4012a052730f596feca8b055c4deb2b0eba5c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Sep 2015 12:13:45 -0700 Subject: [PATCH 35/64] handle a last uncompressed chunk of non-canonical size in lz4 compression --- src/mini-lz4.js | 4 ++-- tests/test_browser.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mini-lz4.js b/src/mini-lz4.js index effc3e693f838..4d7553f51a9c4 100644 --- a/src/mini-lz4.js +++ b/src/mini-lz4.js @@ -297,7 +297,7 @@ exports.compressPackage = function(data) { assert(compressedSize === 0); // failure to compress :( compressedChunks.push(chunk); - total += exports.CHUNK_SIZE; + total += chunk.length; // last chunk may not be the full exports.CHUNK_SIZE size successes.push(0); } } @@ -318,7 +318,7 @@ exports.compressPackage = function(data) { compressedData.sizes[i] = compressedChunks[i].length offset += compressedChunks[i].length; } - console.log('compressed package into ' + compressedData.data.length); + console.log('compressed package into ' + [compressedData.data.length]); assert(offset === total); return compressedData; }; diff --git a/tests/test_browser.py b/tests/test_browser.py index 5fbd8f152ecdd..805d956d3512f 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1036,7 +1036,7 @@ def test_fs_lz4fs_package(self): import random open('file1.txt', 'w').write('0123456789' * (1024*128)) open('file2.txt', 'w').write('1234567890' * (1024*128)) - random_data = [chr(random.randint(0,255)) for x in range(1024*128*10)] + random_data = [chr(random.randint(0,255)) for x in range(1024*128*10 + 1)] random_data[17] = 'X' open('file3.txt', 'w').write(''.join(random_data)) From 909fed02e6f9a216659c2802a998eacdcc6f4ef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 7 Sep 2015 22:46:39 +0300 Subject: [PATCH 36/64] Remove unhelpful CMake version info and vague reference to a bug from getting started documentation. There are currently no known issues with using latest CMake with Emscripten - that should be always preferred. Also, building projects with CMake is able to report which version it needs (via the cmake_minimum_required() directive), so it is not good to redundantly duplicate it in the documentation as there is no danger to get silent breakage from using an old version. Closes #3749. --- site/source/docs/getting_started/downloads.rst | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/site/source/docs/getting_started/downloads.rst b/site/source/docs/getting_started/downloads.rst index 8c2eced8790e3..ee981197ab09f 100644 --- a/site/source/docs/getting_started/downloads.rst +++ b/site/source/docs/getting_started/downloads.rst @@ -117,10 +117,7 @@ These instructions explain how to install **all** the :ref:`required tools `_. - - .. tip:: This specific version (2.8.10) is recommended — it has been tested and shown to work. Other versions may not correctly set up the PATH variables, with the result that running *cmake* gives you "not found" errors. - + - Download and install latest CMake from `Kitware CMake downloads `_. #. Install *node.js* from http://nodejs.org/ @@ -157,8 +154,6 @@ Linux # Install cmake sudo apt-get install cmake -.. note:: You will probably need CMake version 2.8.8 or later. - - *Python*, *node.js* or *Java* are not provided by *emsdk*. The user is expected to install these beforehand with the *system package manager*: :: From 8d747a536bddb6b33cfd1921417fea8d62139a27 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Sep 2015 16:53:24 -0700 Subject: [PATCH 37/64] non-lz4 code for benchmarking comparisons --- tests/test_browser.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_browser.py b/tests/test_browser.py index 805d956d3512f..f6e4e75209e47 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1051,6 +1051,18 @@ def test_fs_lz4fs_package(self): subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'file2.txt', 'file3.txt', '--separate-metadata', '--js-output=files.js']) self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '1', args=['-DLOAD_MANUALLY'], timeout=60) + '''# non-lz4 for comparison + try: + os.mkdir('files') + except: + pass + shutil.copyfile('file1.txt', os.path.join('files', 'file1.txt')) + shutil.copyfile('file2.txt', os.path.join('files', 'file2.txt')) + shutil.copyfile('file3.txt', os.path.join('files', 'file3.txt')) + out = subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'files/file1.txt', 'files/file2.txt', 'files/file3.txt']) + open('files.js', 'w').write(out) + self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '2', args=['--pre-js', 'files.js'], timeout=60)''' + def test_idbstore(self): secret = str(time.time()) for stage in [0, 1, 2, 3, 0, 1, 2, 0, 0, 1, 4, 2, 5]: From 9c3e519f9a2b57acb9db5f2d43e44d35fe1c649b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Sep 2015 17:31:52 -0700 Subject: [PATCH 38/64] FS.ensureFolder --- src/library_fs.js | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/library_fs.js b/src/library_fs.js index 61a8c65a861cb..29448b2d1b562 100644 --- a/src/library_fs.js +++ b/src/library_fs.js @@ -1545,6 +1545,17 @@ mergeInto(LibraryManager.library, { var path = PATH.join2(typeof parent === 'string' ? parent : FS.getPath(parent), name); return FS.symlink(target, path); }, + ensureFolder: function(path, canRead, canWrite) { + if (path === '/') return; + var mode = FS.getMode(canRead, canWrite); + var parts = path.split('/'); + for (var i = 2; i <= parts.length; i++) { + var curr = parts.slice(0, i).join('/'); + if (!FS.analyzePath(curr).object) { + FS.mkdir(curr, mode); + } + } + }, // Makes sure a file's contents are loaded. Returns whether the file has // been loaded successfully. No-op for files that have been loaded already. forceLoadFile: function(obj) { From 60500e85feaee26d14e04b3b177161ce0ecd40dd Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Sep 2015 17:32:17 -0700 Subject: [PATCH 39/64] move to a loadPackage method in lz4 compression, so we can load files even in the root, unlike FS.mount filesystems --- src/library_lz4fs.js | 55 +++++++++++++++-------------------------- tests/fs/test_lz4fs.cpp | 12 ++++----- tests/test_browser.py | 10 +++++--- tools/file_packager.py | 4 +-- 4 files changed, 33 insertions(+), 48 deletions(-) diff --git a/src/library_lz4fs.js b/src/library_lz4fs.js index 21429d0129810..dc5ce0d19f9ee 100644 --- a/src/library_lz4fs.js +++ b/src/library_lz4fs.js @@ -6,7 +6,7 @@ mergeInto(LibraryManager.library, { FILE_MODE: {{{ cDefine('S_IFREG') }}} | 511 /* 0777 */, CHUNK_SIZE: -1, LZ4: null, - mount: function (mount) { + loadPackage: function (pack) { if (!LZ4FS.LZ4) { LZ4FS.LZ4 = (function() { {{{ read('mini-lz4.js') }}}; @@ -14,46 +14,31 @@ mergeInto(LibraryManager.library, { })(); LZ4FS.CHUNK_SIZE = LZ4FS.LZ4.CHUNK_SIZE; } - var root = LZ4FS.createNode(null, '/', LZ4FS.DIR_MODE, 0); - var createdParents = {}; - function ensureParent(path) { - // return the parent node, creating subdirs as necessary - var parts = path.split('/'); - var parent = root; - for (var i = 0; i < parts.length-1; i++) { - var curr = parts.slice(0, i+1).join('/'); - if (!createdParents[curr]) { - createdParents[curr] = LZ4FS.createNode(parent, curr, LZ4FS.DIR_MODE, 0); - } - parent = createdParents[curr]; - } - return parent; - } function base(path) { var parts = path.split('/'); return parts[parts.length-1]; } - mount.opts["packages"].forEach(function(pack) { - var compressedData = pack['compressedData']; - if (!compressedData) compressedData = LZ4FS.LZ4.compressPackage(pack['data']); - assert(compressedData.cachedIndexes.length === compressedData.cachedChunks.length); - for (var i = 0; i < compressedData.cachedIndexes.length; i++) { - compressedData.cachedIndexes[i] = -1; - compressedData.cachedChunks[i] = compressedData.data.subarray(compressedData.cachedOffset + i*LZ4FS.CHUNK_SIZE, - compressedData.cachedOffset + (i+1)*LZ4FS.CHUNK_SIZE); - assert(compressedData.cachedChunks[i].length === LZ4FS.CHUNK_SIZE); - } - console.log('mounting package'); - pack['metadata'].files.forEach(function(file) { - var name = file.filename.substr(1); // remove initial slash - LZ4FS.createNode(ensureParent(name), base(name), LZ4FS.FILE_MODE, 0, { - compressedData: compressedData, - start: file.start, - end: file.end, - }); + var compressedData = pack['compressedData']; + if (!compressedData) compressedData = LZ4FS.LZ4.compressPackage(pack['data']); + assert(compressedData.cachedIndexes.length === compressedData.cachedChunks.length); + for (var i = 0; i < compressedData.cachedIndexes.length; i++) { + compressedData.cachedIndexes[i] = -1; + compressedData.cachedChunks[i] = compressedData.data.subarray(compressedData.cachedOffset + i*LZ4FS.CHUNK_SIZE, + compressedData.cachedOffset + (i+1)*LZ4FS.CHUNK_SIZE); + assert(compressedData.cachedChunks[i].length === LZ4FS.CHUNK_SIZE); + } + console.log('loading package'); + pack['metadata'].files.forEach(function(file) { + var dir = PATH.dirname(file.filename); + var name = PATH.basename(file.filename); + FS.ensureFolder(dir, true, true); + var parent = FS.analyzePath(dir).object; + LZ4FS.createNode(parent, name, LZ4FS.FILE_MODE, 0, { + compressedData: compressedData, + start: file.start, + end: file.end, }); }); - return root; }, createNode: function (parent, name, mode, dev, contents, mtime) { var node = FS.createNode(parent, name, mode); diff --git a/tests/fs/test_lz4fs.cpp b/tests/fs/test_lz4fs.cpp index a689ecd17bae0..890da1d174995 100644 --- a/tests/fs/test_lz4fs.cpp +++ b/tests/fs/test_lz4fs.cpp @@ -17,11 +17,11 @@ void EMSCRIPTEN_KEEPALIVE finish() { int num; printf("load files\n"); - FILE *f1 = fopen("files/file1.txt", "r"); + FILE *f1 = fopen("file1.txt", "r"); assert(f1); - FILE *f2 = fopen("files/file2.txt", "r"); + FILE *f2 = fopen("subdir/file2.txt", "r"); assert(f2); - FILE *f3 = fopen("files/file3.txt", "r"); + FILE *f3 = fopen("file3.txt", "r"); assert(f3); FILE *files[] = { f1, f2, f3 }; double before = emscripten_get_now(); @@ -138,11 +138,9 @@ int main() { Module.print('loading into filesystem'); FS.mkdir('/files'); - var root = FS.mount(LZ4FS, { - packages: [{ metadata: meta, data: data }] - }, '/files'); + LZ4FS.loadPackage({ metadata: meta, data: data }); - Module.compressedData = root.contents['file1.txt'].contents.compressedData; + Module.compressedData = FS.root.contents['file1.txt'].contents.compressedData; var compressedSize = Module.compressedData.data.length; var low = COMPLETE_SIZE/3; var high = COMPLETE_SIZE/2; diff --git a/tests/test_browser.py b/tests/test_browser.py index f6e4e75209e47..9b9d0bd7a8086 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1034,21 +1034,25 @@ def test_fs_workerfs_package(self): def test_fs_lz4fs_package(self): # generate data import random + try: + os.mkdir('subdir') + except: + pass open('file1.txt', 'w').write('0123456789' * (1024*128)) - open('file2.txt', 'w').write('1234567890' * (1024*128)) + open(os.path.join('subdir', 'file2.txt'), 'w').write('1234567890' * (1024*128)) random_data = [chr(random.randint(0,255)) for x in range(1024*128*10 + 1)] random_data[17] = 'X' open('file3.txt', 'w').write(''.join(random_data)) # compress in the file packager, on the server. the client receives compressed data and can just use it. this is typical usage print 'normal' - out = subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'file2.txt', 'file3.txt', '--lz4=files']) + out = subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'subdir/file2.txt', 'file3.txt', '--lz4=files']) open('files.js', 'w').write(out) self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '2', args=['--pre-js', 'files.js'], timeout=60) # load the data into LZ4FS manually at runtime. This means we compress on the client. This is generally not recommended print 'manual' - subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'file2.txt', 'file3.txt', '--separate-metadata', '--js-output=files.js']) + subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'subdir/file2.txt', 'file3.txt', '--separate-metadata', '--js-output=files.js']) self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '1', args=['-DLOAD_MANUALLY'], timeout=60) '''# non-lz4 for comparison diff --git a/tools/file_packager.py b/tools/file_packager.py index 224bf16e0c3d2..f5b7a2d88f159 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -536,9 +536,7 @@ def was_seen(name): FS.mkdir('/' + LZ4_DIR); var compressedData = %s; compressedData.data = byteArray; - var root = FS.mount(LZ4FS, { - packages: [{ metadata: metadata, compressedData: compressedData }] - }, '/' + LZ4_DIR); + LZ4FS.loadPackage({ metadata: metadata, compressedData: compressedData }); Module['removeRunDependency']('datafile_%s'); ''' % (lz4, meta, data_target) From dd036a6577e4a29067502c7c893aa54a7b717952 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Sep 2015 17:49:55 -0700 Subject: [PATCH 40/64] move LZ4FS to LZ4, a subsystem with an init, instead of an FS --- src/library_fs.js | 3 +- src/{library_lz4fs.js => library_lz4.js} | 63 +++++++++++++----------- src/modules.js | 4 +- system/include/emscripten/emscripten.h | 4 ++ tests/fs/test_lz4fs.cpp | 4 +- tests/test_browser.py | 2 +- tools/file_packager.py | 20 ++++---- 7 files changed, 53 insertions(+), 47 deletions(-) rename src/{library_lz4fs.js => library_lz4.js} (81%) diff --git a/src/library_fs.js b/src/library_fs.js index 29448b2d1b562..cebe0fa7ad752 100644 --- a/src/library_fs.js +++ b/src/library_fs.js @@ -1,5 +1,5 @@ mergeInto(LibraryManager.library, { - $FS__deps: ['$ERRNO_CODES', '$ERRNO_MESSAGES', '__setErrNo', '$PATH', '$TTY', '$MEMFS', '$IDBFS', '$NODEFS', '$WORKERFS', '$LZ4FS', 'stdin', 'stdout', 'stderr'], + $FS__deps: ['$ERRNO_CODES', '$ERRNO_MESSAGES', '__setErrNo', '$PATH', '$TTY', '$MEMFS', '$IDBFS', '$NODEFS', '$WORKERFS', 'stdin', 'stdout', 'stderr'], $FS__postset: 'FS.staticInit();' + '__ATINIT__.unshift(function() { if (!Module["noFSInit"] && !FS.init.initialized) FS.init() });' + '__ATMAIN__.push(function() { FS.ignorePermissions = false });' + @@ -1354,7 +1354,6 @@ mergeInto(LibraryManager.library, { 'IDBFS': IDBFS, 'NODEFS': NODEFS, 'WORKERFS': WORKERFS, - 'LZ4FS': LZ4FS, }; }, init: function(input, output, error) { diff --git a/src/library_lz4fs.js b/src/library_lz4.js similarity index 81% rename from src/library_lz4fs.js rename to src/library_lz4.js index dc5ce0d19f9ee..8486a07f160fc 100644 --- a/src/library_lz4fs.js +++ b/src/library_lz4.js @@ -1,31 +1,29 @@ // TODO: put behind a flag mergeInto(LibraryManager.library, { - $LZ4FS__deps: ['$FS'], - $LZ4FS: { + $LZ4__deps: ['$FS'], + $LZ4: { DIR_MODE: {{{ cDefine('S_IFDIR') }}} | 511 /* 0777 */, FILE_MODE: {{{ cDefine('S_IFREG') }}} | 511 /* 0777 */, CHUNK_SIZE: -1, - LZ4: null, + codec: null, + init: function() { + if (LZ4.codec) return; + LZ4.codec = (function() { + {{{ read('mini-lz4.js') }}}; + return MiniLZ4; + })(); + LZ4.CHUNK_SIZE = LZ4.codec.CHUNK_SIZE; + }, loadPackage: function (pack) { - if (!LZ4FS.LZ4) { - LZ4FS.LZ4 = (function() { - {{{ read('mini-lz4.js') }}}; - return MiniLZ4; - })(); - LZ4FS.CHUNK_SIZE = LZ4FS.LZ4.CHUNK_SIZE; - } - function base(path) { - var parts = path.split('/'); - return parts[parts.length-1]; - } + LZ4.init(); var compressedData = pack['compressedData']; - if (!compressedData) compressedData = LZ4FS.LZ4.compressPackage(pack['data']); + if (!compressedData) compressedData = LZ4.codec.compressPackage(pack['data']); assert(compressedData.cachedIndexes.length === compressedData.cachedChunks.length); for (var i = 0; i < compressedData.cachedIndexes.length; i++) { compressedData.cachedIndexes[i] = -1; - compressedData.cachedChunks[i] = compressedData.data.subarray(compressedData.cachedOffset + i*LZ4FS.CHUNK_SIZE, - compressedData.cachedOffset + (i+1)*LZ4FS.CHUNK_SIZE); - assert(compressedData.cachedChunks[i].length === LZ4FS.CHUNK_SIZE); + compressedData.cachedChunks[i] = compressedData.data.subarray(compressedData.cachedOffset + i*LZ4.CHUNK_SIZE, + compressedData.cachedOffset + (i+1)*LZ4.CHUNK_SIZE); + assert(compressedData.cachedChunks[i].length === LZ4.CHUNK_SIZE); } console.log('loading package'); pack['metadata'].files.forEach(function(file) { @@ -33,7 +31,7 @@ mergeInto(LibraryManager.library, { var name = PATH.basename(file.filename); FS.ensureFolder(dir, true, true); var parent = FS.analyzePath(dir).object; - LZ4FS.createNode(parent, name, LZ4FS.FILE_MODE, 0, { + LZ4.createNode(parent, name, LZ4.FILE_MODE, 0, { compressedData: compressedData, start: file.start, end: file.end, @@ -43,11 +41,11 @@ mergeInto(LibraryManager.library, { createNode: function (parent, name, mode, dev, contents, mtime) { var node = FS.createNode(parent, name, mode); node.mode = mode; - node.node_ops = LZ4FS.node_ops; - node.stream_ops = LZ4FS.stream_ops; + node.node_ops = LZ4.node_ops; + node.stream_ops = LZ4.stream_ops; node.timestamp = (mtime || new Date).getTime(); - assert(LZ4FS.FILE_MODE !== LZ4FS.DIR_MODE); - if (mode === LZ4FS.FILE_MODE) { + assert(LZ4.FILE_MODE !== LZ4.DIR_MODE); + if (mode === LZ4.FILE_MODE) { node.size = contents.end - contents.start; node.contents = contents; } else { @@ -112,7 +110,7 @@ mergeInto(LibraryManager.library, { }, stream_ops: { read: function (stream, buffer, offset, length, position) { - //console.log('LZ4FS read ' + [offset, length, position]); + //console.log('LZ4 read ' + [offset, length, position]); length = Math.min(length, stream.node.size - position); if (length <= 0) return 0; var contents = stream.node.contents; @@ -122,7 +120,7 @@ mergeInto(LibraryManager.library, { var start = contents.start + position + written; // start index in uncompressed data var desired = length - written; //console.log('current read: ' + ['start', start, 'desired', desired]); - var chunkIndex = Math.floor(start / LZ4FS.CHUNK_SIZE); + var chunkIndex = Math.floor(start / LZ4.CHUNK_SIZE); var compressedStart = compressedData.offsets[chunkIndex]; var compressedSize = compressedData.sizes[chunkIndex]; var currChunk; @@ -142,16 +140,16 @@ mergeInto(LibraryManager.library, { } var compressed = compressedData.data.subarray(compressedStart, compressedStart + compressedSize); //var t = Date.now(); - var originalSize = LZ4FS.LZ4.uncompress(compressed, currChunk); + var originalSize = LZ4.codec.uncompress(compressed, currChunk); //console.log('decompress time: ' + (Date.now() - t)); - assert(originalSize === LZ4FS.CHUNK_SIZE); + assert(originalSize === LZ4.CHUNK_SIZE); } } else { // uncompressed - currChunk = compressedData.data.subarray(compressedStart, compressedStart + LZ4FS.CHUNK_SIZE); + currChunk = compressedData.data.subarray(compressedStart, compressedStart + LZ4.CHUNK_SIZE); } - var startInChunk = start % LZ4FS.CHUNK_SIZE; - var endInChunk = Math.min(startInChunk + desired, LZ4FS.CHUNK_SIZE); + var startInChunk = start % LZ4.CHUNK_SIZE; + var endInChunk = Math.min(startInChunk + desired, LZ4.CHUNK_SIZE); buffer.set(currChunk.subarray(startInChunk, endInChunk), offset + written); var currWritten = endInChunk - startInChunk; written += currWritten; @@ -177,4 +175,9 @@ mergeInto(LibraryManager.library, { }, }, }, + emscripten_init_lz4__deps: ['$LZ4'], + emscripten_init_lz4: function() { + LZ4.init(); + }, }); + diff --git a/src/modules.js b/src/modules.js index 0f376a260c4f7..b2b833f440b1c 100644 --- a/src/modules.js +++ b/src/modules.js @@ -110,8 +110,8 @@ var LibraryManager = { 'library_nodefs.js', 'library_sockfs.js', 'library_workerfs.js', - 'library_lz4fs.js', - 'library_tty.js' + 'library_tty.js', + 'library_lz4.js', ]); } if (!NO_BROWSER) { diff --git a/system/include/emscripten/emscripten.h b/system/include/emscripten/emscripten.h index 2fa9eb6a0e3ca..a3912bfa376d3 100644 --- a/system/include/emscripten/emscripten.h +++ b/system/include/emscripten/emscripten.h @@ -208,6 +208,10 @@ int emscripten_async_prepare(const char* file, em_str_callback_func onload, em_s typedef void (*em_async_prepare_data_onload_func)(void*, const char*); void emscripten_async_prepare_data(char* data, int size, const char *suffix, void *arg, em_async_prepare_data_onload_func onload, em_arg_callback_func onerror); +// init() methods for optional subsystems + +void emscripten_init_lz4(void); + // worker APIs typedef int worker_handle; diff --git a/tests/fs/test_lz4fs.cpp b/tests/fs/test_lz4fs.cpp index 890da1d174995..4fe88c4aa09ed 100644 --- a/tests/fs/test_lz4fs.cpp +++ b/tests/fs/test_lz4fs.cpp @@ -126,6 +126,8 @@ void EMSCRIPTEN_KEEPALIVE finish() { int main() { before_it_all = emscripten_get_now(); + emscripten_init_lz4(); + #if LOAD_MANUALLY EM_ASM({ var COMPLETE_SIZE = 10*1024*128*3; @@ -138,7 +140,7 @@ int main() { Module.print('loading into filesystem'); FS.mkdir('/files'); - LZ4FS.loadPackage({ metadata: meta, data: data }); + LZ4.loadPackage({ metadata: meta, data: data }); Module.compressedData = FS.root.contents['file1.txt'].contents.compressedData; var compressedSize = Module.compressedData.data.length; diff --git a/tests/test_browser.py b/tests/test_browser.py index 9b9d0bd7a8086..9c3ee7d94a394 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1046,7 +1046,7 @@ def test_fs_lz4fs_package(self): # compress in the file packager, on the server. the client receives compressed data and can just use it. this is typical usage print 'normal' - out = subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'subdir/file2.txt', 'file3.txt', '--lz4=files']) + out = subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'subdir/file2.txt', 'file3.txt', '--lz4']) open('files.js', 'w').write(out) self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '2', args=['--pre-js', 'files.js'], timeout=60) diff --git a/tools/file_packager.py b/tools/file_packager.py index f5b7a2d88f159..4df7553b5fda2 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -11,7 +11,7 @@ Usage: - file_packager.py TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]] [--compress COMPRESSION_DATA] [--crunch[=X]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--no-heap-copy] [--separate-metadata] [--lz4=DIR_NAME] + file_packager.py TARGET [--preload A [B..]] [--embed C [D..]] [--exclude E [F..]] [--compress COMPRESSION_DATA] [--crunch[=X]] [--js-output=OUTPUT.js] [--no-force] [--use-preload-cache] [--no-heap-copy] [--separate-metadata] [--lz4] --preload , --embed See emcc --help for more details on those options. @@ -40,9 +40,8 @@ --separate-metadata Stores package metadata separately. Only applicable when preloading and js-output file is specified. - --lz4=DIR_NAME Uses LZ4FS. This compresses the data using LZ4 when this utility is run, then the client decompresses chunks on the fly, avoiding storing - the entire decompressed data in memory at once. DIR_NAME is the name of a directory to mount the files under (LZ4FS cannot be mounted - on /, it has to be a subdirectory) + --lz4 Uses LZ4. This compresses the data using LZ4 when this utility is run, then the client decompresses chunks on the fly, avoiding storing + the entire decompressed data in memory at once. Notes: @@ -100,7 +99,7 @@ # If set to True, the package metadata is stored separately from js-output file which makes js-output file immutable to the package content changes. # If set to False, the package metadata is stored inside the js-output file which makes js-output file to mutate on each invocation of this packager tool. separate_metadata = False -lz4 = None +lz4 = False for arg in sys.argv[2:]: if arg == '--preload': @@ -126,8 +125,8 @@ elif arg == '--separate-metadata': separate_metadata = True leading = '' - elif arg.startswith('--lz4='): - lz4 = arg.split('=')[1] + elif arg == '--lz4': + lz4 = True leading = '' elif arg.startswith('--js-output'): jsoutput = arg.split('=')[1] if '=' in arg else None @@ -532,13 +531,12 @@ def was_seen(name): meta = run_js(shared.path_from_root('tools', 'lz4-compress.js'), shared.NODE_JS, [shared.path_from_root('src', 'mini-lz4.js'), temp, data_target], stdout=PIPE) os.unlink(temp) use_data = ''' - var LZ4_DIR = '%s'; - FS.mkdir('/' + LZ4_DIR); var compressedData = %s; compressedData.data = byteArray; - LZ4FS.loadPackage({ metadata: metadata, compressedData: compressedData }); + assert(typeof LZ4 === 'object', 'LZ4 not present - does your app call emscripten_init_lz4(), which should ensure LZ4 is linked in?'); + LZ4.loadPackage({ metadata: metadata, compressedData: compressedData }); Module['removeRunDependency']('datafile_%s'); - ''' % (lz4, meta, data_target) + ''' % (meta, data_target) package_uuid = uuid.uuid4(); package_name = Compression.compressed_name(data_target) if Compression.on else data_target From 4135c57a075cd9821a9ed70bb726703d074ea9c7 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Sep 2015 18:24:35 -0700 Subject: [PATCH 41/64] add lz4 verification as an option --- src/mini-lz4.js | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/mini-lz4.js b/src/mini-lz4.js index 4d7553f51a9c4..6cbb16b23cd59 100644 --- a/src/mini-lz4.js +++ b/src/mini-lz4.js @@ -271,7 +271,10 @@ function compressBlock (src, dst, pos, hashTable, sIdx, eIdx) { exports.CHUNK_SIZE = 2048; // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea -exports.compressPackage = function(data) { +exports.compressPackage = function(data, verify) { + if (verify) { + var temp = new Uint8Array(exports.CHUNK_SIZE); + } // compress the data in chunks assert(data instanceof ArrayBuffer); data = new Uint8Array(data); @@ -293,6 +296,13 @@ exports.compressPackage = function(data) { compressedChunks.push(compressed); total += compressedSize; successes.push(1); + if (verify) { + var back = exports.uncompress(compressed, temp); + assert(back === chunk.length, [back, chunk.length]); + for (var i = 0; i < chunk.length; i++) { + assert(chunk[i] === temp[i]); + } + } } else { assert(compressedSize === 0); // failure to compress :( From 0e7569342970add31b9ec1ee52d418694ceb7c5d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Sep 2015 18:34:18 -0700 Subject: [PATCH 42/64] note in header --- system/include/emscripten/emscripten.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/system/include/emscripten/emscripten.h b/system/include/emscripten/emscripten.h index a3912bfa376d3..913384fc091ad 100644 --- a/system/include/emscripten/emscripten.h +++ b/system/include/emscripten/emscripten.h @@ -210,7 +210,7 @@ void emscripten_async_prepare_data(char* data, int size, const char *suffix, voi // init() methods for optional subsystems -void emscripten_init_lz4(void); +void emscripten_init_lz4(void); // see file_packager.py // worker APIs From b1284bcce27787743d68d20fc7e381b87ccafdc3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Sep 2015 18:34:41 -0700 Subject: [PATCH 43/64] better assertions in lz4 compressor --- tools/lz4-compress.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lz4-compress.js b/tools/lz4-compress.js index 1732b6352910e..cc4fa466b0521 100644 --- a/tools/lz4-compress.js +++ b/tools/lz4-compress.js @@ -115,8 +115,8 @@ if (typeof print === 'undefined') { this['print'] = printErr; } -assert = function(x) { - if (!x) throw 'assertion failed ' + new Error().stack; +assert = function(x, message) { + if (!x) throw 'assertion failed: ' + message + ' : ' + new Error().stack; } if (!Math['imul'] || Math['imul'](0xffffffff, 5) !== -5) Math['imul'] = function imul(a, b) { From b13d11ec74d513c6ad90cd4beeca3d513d30eac8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 7 Sep 2015 18:34:58 -0700 Subject: [PATCH 44/64] note on LZ$ compression in file packager --- tools/file_packager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/file_packager.py b/tools/file_packager.py index 4df7553b5fda2..b2a5908459250 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -42,6 +42,8 @@ --lz4 Uses LZ4. This compresses the data using LZ4 when this utility is run, then the client decompresses chunks on the fly, avoiding storing the entire decompressed data in memory at once. + Limitations: LZ4-compressed files are only decompressed when needed, so they are not ready to be processed during startup, for + preloading of images using browser codecs, for example. Notes: From bbd8c98a082a95e8bf8f7090fec5d7b316922e74 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Sep 2015 10:47:17 -0700 Subject: [PATCH 45/64] rename to createFolders, and throw on error --- src/library_fs.js | 10 ++++++++-- src/library_lz4.js | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/library_fs.js b/src/library_fs.js index cebe0fa7ad752..5ca6db4045363 100644 --- a/src/library_fs.js +++ b/src/library_fs.js @@ -1544,14 +1544,20 @@ mergeInto(LibraryManager.library, { var path = PATH.join2(typeof parent === 'string' ? parent : FS.getPath(parent), name); return FS.symlink(target, path); }, - ensureFolder: function(path, canRead, canWrite) { + // Creates all necessary folders so that path exists and is a directory, if they do + // not already exist. + // If a non-directory blocks doing so, throws EEXIST. + createFolders: function(path, canRead, canWrite) { if (path === '/') return; var mode = FS.getMode(canRead, canWrite); var parts = path.split('/'); for (var i = 2; i <= parts.length; i++) { var curr = parts.slice(0, i).join('/'); - if (!FS.analyzePath(curr).object) { + var seen = FS.analyzePath(curr).object; + if (!seen) { FS.mkdir(curr, mode); + } else if (!FS.isDir(seen.mode)) { + throw new FS.ErrnoError(ERRNO_CODES.EEXIST); } } }, diff --git a/src/library_lz4.js b/src/library_lz4.js index 8486a07f160fc..ed9e9bd3408d7 100644 --- a/src/library_lz4.js +++ b/src/library_lz4.js @@ -29,7 +29,7 @@ mergeInto(LibraryManager.library, { pack['metadata'].files.forEach(function(file) { var dir = PATH.dirname(file.filename); var name = PATH.basename(file.filename); - FS.ensureFolder(dir, true, true); + FS.createFolders(dir, true, true); var parent = FS.analyzePath(dir).object; LZ4.createNode(parent, name, LZ4.FILE_MODE, 0, { compressedData: compressedData, From 55a83c5ac0d2d8f66c51e56dc6d093635d322493 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Sep 2015 10:53:55 -0700 Subject: [PATCH 46/64] use LZ4=1 to enable lz4 compression --- src/library_lz4.js | 8 +++----- src/settings.js | 9 +++++++++ system/include/emscripten/emscripten.h | 4 ---- tests/fs/test_lz4fs.cpp | 2 -- tests/test_browser.py | 4 ++-- tools/file_packager.py | 6 ++---- 6 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/library_lz4.js b/src/library_lz4.js index ed9e9bd3408d7..c5fb717d09aad 100644 --- a/src/library_lz4.js +++ b/src/library_lz4.js @@ -1,4 +1,4 @@ -// TODO: put behind a flag +#if LZ4 mergeInto(LibraryManager.library, { $LZ4__deps: ['$FS'], $LZ4: { @@ -175,9 +175,7 @@ mergeInto(LibraryManager.library, { }, }, }, - emscripten_init_lz4__deps: ['$LZ4'], - emscripten_init_lz4: function() { - LZ4.init(); - }, }); +LibraryManager.library['$FS__deps'].push('$LZ4'); // LZ4=1, so auto-include us +#endif diff --git a/src/settings.js b/src/settings.js index 068852f458aee..510068299dc28 100644 --- a/src/settings.js +++ b/src/settings.js @@ -247,6 +247,15 @@ var STB_IMAGE = 0; // Enables building of stb-image, a tiny public-domain librar // When enabled, stb-image will be used automatically from IMG_Load and IMG_Load_RW. You // can also call the stbi_* functions directly yourself. +var LZ4 = 0; // Enable this to support lz4-compressed file packages. They are stored compressed in memory, and + // decompressed on the fly, avoiding storing the entire decompressed data in memory at once. + // You can precompress a file package using --lz4 in the file packager, or compress one at + // runtime, using LZ4.loadPackage(); + // Limitations: LZ4-compressed files are only decompressed when needed, so they are not available + // for special preloading operations like pre-decoding of images using browser codecs, + // preloadPlugin stuff, etc. + + var DISABLE_EXCEPTION_CATCHING = 0; // Disables generating code to actually catch exceptions. If the code you // are compiling does not actually rely on catching exceptions (but the // compiler generates code for it, maybe because of stdlibc++ stuff), diff --git a/system/include/emscripten/emscripten.h b/system/include/emscripten/emscripten.h index 913384fc091ad..2fa9eb6a0e3ca 100644 --- a/system/include/emscripten/emscripten.h +++ b/system/include/emscripten/emscripten.h @@ -208,10 +208,6 @@ int emscripten_async_prepare(const char* file, em_str_callback_func onload, em_s typedef void (*em_async_prepare_data_onload_func)(void*, const char*); void emscripten_async_prepare_data(char* data, int size, const char *suffix, void *arg, em_async_prepare_data_onload_func onload, em_arg_callback_func onerror); -// init() methods for optional subsystems - -void emscripten_init_lz4(void); // see file_packager.py - // worker APIs typedef int worker_handle; diff --git a/tests/fs/test_lz4fs.cpp b/tests/fs/test_lz4fs.cpp index 4fe88c4aa09ed..7d05c57e0f1f0 100644 --- a/tests/fs/test_lz4fs.cpp +++ b/tests/fs/test_lz4fs.cpp @@ -126,8 +126,6 @@ void EMSCRIPTEN_KEEPALIVE finish() { int main() { before_it_all = emscripten_get_now(); - emscripten_init_lz4(); - #if LOAD_MANUALLY EM_ASM({ var COMPLETE_SIZE = 10*1024*128*3; diff --git a/tests/test_browser.py b/tests/test_browser.py index 9c3ee7d94a394..52460773fd927 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1048,12 +1048,12 @@ def test_fs_lz4fs_package(self): print 'normal' out = subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'subdir/file2.txt', 'file3.txt', '--lz4']) open('files.js', 'w').write(out) - self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '2', args=['--pre-js', 'files.js'], timeout=60) + self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '2', args=['--pre-js', 'files.js', '-s', 'LZ4=1'], timeout=60) # load the data into LZ4FS manually at runtime. This means we compress on the client. This is generally not recommended print 'manual' subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'subdir/file2.txt', 'file3.txt', '--separate-metadata', '--js-output=files.js']) - self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '1', args=['-DLOAD_MANUALLY'], timeout=60) + self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '1', args=['-DLOAD_MANUALLY', '-s', 'LZ4=1'], timeout=60) '''# non-lz4 for comparison try: diff --git a/tools/file_packager.py b/tools/file_packager.py index b2a5908459250..75b7ce4a6612f 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -41,9 +41,7 @@ --separate-metadata Stores package metadata separately. Only applicable when preloading and js-output file is specified. --lz4 Uses LZ4. This compresses the data using LZ4 when this utility is run, then the client decompresses chunks on the fly, avoiding storing - the entire decompressed data in memory at once. - Limitations: LZ4-compressed files are only decompressed when needed, so they are not ready to be processed during startup, for - preloading of images using browser codecs, for example. + the entire decompressed data in memory at once. See LZ4 in src/settings.js, you must build the main program with that flag. Notes: @@ -535,7 +533,7 @@ def was_seen(name): use_data = ''' var compressedData = %s; compressedData.data = byteArray; - assert(typeof LZ4 === 'object', 'LZ4 not present - does your app call emscripten_init_lz4(), which should ensure LZ4 is linked in?'); + assert(typeof LZ4 === 'object', 'LZ4 not present - was your app build with -s LZ4=1 ?'); LZ4.loadPackage({ metadata: metadata, compressedData: compressedData }); Module['removeRunDependency']('datafile_%s'); ''' % (meta, data_target) From 262b13668882aee8363031be9219b1f73a6fae79 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Sep 2015 11:03:34 -0700 Subject: [PATCH 47/64] closure support for lz4 --- tests/fs/test_lz4fs.cpp | 12 ++++++------ tests/test_browser.py | 6 ++++++ tools/file_packager.py | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/fs/test_lz4fs.cpp b/tests/fs/test_lz4fs.cpp index 7d05c57e0f1f0..d50efe345f474 100644 --- a/tests/fs/test_lz4fs.cpp +++ b/tests/fs/test_lz4fs.cpp @@ -69,7 +69,7 @@ void EMSCRIPTEN_KEEPALIVE finish() { ret = fseek(f3, TOTAL_SIZE - 5000, SEEK_SET); assert(ret == 0); num = fread(buffer, 1, 1, f3); assert(num == 1); // also near the end EM_ASM({ - assert(!Module.decompressedChunks); + assert(!Module['decompressedChunks']); Module.compressedData.debug = true; console.log('last cached indexes ' + Module.compressedData.cachedIndexes); assert(Module.compressedData.cachedIndexes.indexOf(0) < 0); // 0 is not cached @@ -82,7 +82,7 @@ void EMSCRIPTEN_KEEPALIVE finish() { assert(num == 1); } EM_ASM({ - assert(Module.decompressedChunks == 1, ['seeing', Module.decompressedChunks, 'decompressed chunks']); + assert(Module['decompressedChunks'] == 1, ['seeing', Module['decompressedChunks'], 'decompressed chunks']); }); printf("multiple reads of adjoining byte\n"); for (int i = 0; i < 100; i++) { @@ -92,7 +92,7 @@ void EMSCRIPTEN_KEEPALIVE finish() { assert(num == 1); } EM_ASM({ - assert(Module.decompressedChunks == 1, ['seeing', Module.decompressedChunks, 'decompressed chunks']); + assert(Module['decompressedChunks'] == 1, ['seeing', Module['decompressedChunks'], 'decompressed chunks']); }); printf("multiple reads across two chunks\n"); for (int i = 0; i < 2100; i++) { @@ -102,7 +102,7 @@ void EMSCRIPTEN_KEEPALIVE finish() { assert(num == 1); } EM_ASM({ - assert(Module.decompressedChunks == 2, ['seeing', Module.decompressedChunks, 'decompressed chunks']); + assert(Module['decompressedChunks'] == 2, ['seeing', Module['decompressedChunks'], 'decompressed chunks']); }); printf("caching test ok\n"); #endif @@ -138,7 +138,7 @@ int main() { Module.print('loading into filesystem'); FS.mkdir('/files'); - LZ4.loadPackage({ metadata: meta, data: data }); + LZ4.loadPackage({ 'metadata': meta, 'data': data }); Module.compressedData = FS.root.contents['file1.txt'].contents.compressedData; var compressedSize = Module.compressedData.data.length; @@ -147,7 +147,7 @@ int main() { console.log('seeing compressed size of ' + compressedSize + ', expect in ' + [low, high]); assert(compressedSize > low && compressedSize < high); // more than 1/3, because 1/3 is uncompressible, but still, less than 1/2 - Module.ccall('finish'); + Module['ccall']('finish'); } var meta_xhr = new XMLHttpRequest(); diff --git a/tests/test_browser.py b/tests/test_browser.py index 52460773fd927..fd3181e4dc5b8 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1049,11 +1049,17 @@ def test_fs_lz4fs_package(self): out = subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'subdir/file2.txt', 'file3.txt', '--lz4']) open('files.js', 'w').write(out) self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '2', args=['--pre-js', 'files.js', '-s', 'LZ4=1'], timeout=60) + print ' opts' + self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '2', args=['--pre-js', 'files.js', '-s', 'LZ4=1', '-O2'], timeout=60) # load the data into LZ4FS manually at runtime. This means we compress on the client. This is generally not recommended print 'manual' subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'subdir/file2.txt', 'file3.txt', '--separate-metadata', '--js-output=files.js']) self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '1', args=['-DLOAD_MANUALLY', '-s', 'LZ4=1'], timeout=60) + print ' opts' + self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '1', args=['-DLOAD_MANUALLY', '-s', 'LZ4=1', '-O2'], timeout=60) + print ' opts+closure' + self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '1', args=['-DLOAD_MANUALLY', '-s', 'LZ4=1', '-O2', '--closure', '1', '-g1'], timeout=60) '''# non-lz4 for comparison try: diff --git a/tools/file_packager.py b/tools/file_packager.py index 75b7ce4a6612f..871b193a282cb 100644 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -534,7 +534,7 @@ def was_seen(name): var compressedData = %s; compressedData.data = byteArray; assert(typeof LZ4 === 'object', 'LZ4 not present - was your app build with -s LZ4=1 ?'); - LZ4.loadPackage({ metadata: metadata, compressedData: compressedData }); + LZ4.loadPackage({ 'metadata': metadata, 'compressedData': compressedData }); Module['removeRunDependency']('datafile_%s'); ''' % (meta, data_target) From 0f004f22d7036fcad8220047586b2ad7d9250dc4 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Sep 2015 11:46:03 -0700 Subject: [PATCH 48/64] improve EM_ASM docs --- .../Interacting-with-code.rst | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/site/source/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.rst b/site/source/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.rst index 13a6622e73a90..6c4869469f994 100644 --- a/site/source/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.rst +++ b/site/source/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.rst @@ -221,9 +221,18 @@ an alert, followed by an exception. (Note, however, that under the hood Emscripten still does a function call even in this case, which has some amount of overhead.) -You can also send values from C into JavaScript inside :c:macro:`EM_ASM_`, -as well as receive values back (see the :c:macro:`linked macro ` -for details. The following example will print out ``I received: 100`` +You can also send values from C into JavaScript inside :c:macro:`EM_ASM_` +(note the extra "_" at the end), for example + +.. code-block:: cpp + + EM_ASM_({ + Module.print('I received: ' + $0); + }, 100); + +This will show ``I received: 100``. + +You can also receive values back, for example the following will print out ``I received: 100`` and then ``101``. .. code-block:: cpp @@ -234,6 +243,8 @@ and then ``101``. }, 100); printf("%d\n", x); +See the :c:macro:`emscripten.h docs ` for more details. + .. note:: - You need to specify if the return value is an ``int`` or a ``double`` From 0438638795cfdfb85802b8e7bc90acf9d6c49b80 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 4 Sep 2015 10:48:19 -0700 Subject: [PATCH 49/64] fix another worker race condition #3741 --- src/postamble.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/postamble.js b/src/postamble.js index 80172de8a085b..fb32fa7e0ccb8 100644 --- a/src/postamble.js +++ b/src/postamble.js @@ -351,10 +351,11 @@ var workerResponded = false, workerCallbackId = -1; function flushMessages() { if (!messageBuffer) return; if (runtimeInitialized) { - messageBuffer.forEach(function(message) { + var temp = messageBuffer; + messageBuffer = null; + temp.forEach(function(message) { onmessage(message); }); - messageBuffer = null; } } From e8911cc36f13631c7008dba93d20af06bbfc40b5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Sep 2015 15:26:43 -0700 Subject: [PATCH 50/64] the last lz4 chunk might not be full-size --- src/library_lz4.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/library_lz4.js b/src/library_lz4.js index c5fb717d09aad..f4a7d51c26ada 100644 --- a/src/library_lz4.js +++ b/src/library_lz4.js @@ -142,7 +142,7 @@ mergeInto(LibraryManager.library, { //var t = Date.now(); var originalSize = LZ4.codec.uncompress(compressed, currChunk); //console.log('decompress time: ' + (Date.now() - t)); - assert(originalSize === LZ4.CHUNK_SIZE); + if (chunkIndex < compressedData.successes.length-1) assert(originalSize === LZ4.CHUNK_SIZE); // all but the last chunk must be full-size } } else { // uncompressed From dcbb09ea26fea204f762d8b59b415343d3708464 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Sep 2015 15:38:36 -0700 Subject: [PATCH 51/64] clean up test_fs_lz4fs_package --- tests/test_browser.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/test_browser.py b/tests/test_browser.py index fd3181e4dc5b8..c65cae1262308 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1034,10 +1034,8 @@ def test_fs_workerfs_package(self): def test_fs_lz4fs_package(self): # generate data import random - try: - os.mkdir('subdir') - except: - pass + try_delete('subdir') + os.mkdir('subdir') open('file1.txt', 'w').write('0123456789' * (1024*128)) open(os.path.join('subdir', 'file2.txt'), 'w').write('1234567890' * (1024*128)) random_data = [chr(random.randint(0,255)) for x in range(1024*128*10 + 1)] From 0ccf5913215714d5eacd011f1a997cdc038f92fd Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Sep 2015 15:41:31 -0700 Subject: [PATCH 52/64] use existing createPath, no need for new createFolders --- src/library_fs.js | 17 ----------------- src/library_lz4.js | 2 +- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/src/library_fs.js b/src/library_fs.js index 5ca6db4045363..1f576c64b515d 100644 --- a/src/library_fs.js +++ b/src/library_fs.js @@ -1544,23 +1544,6 @@ mergeInto(LibraryManager.library, { var path = PATH.join2(typeof parent === 'string' ? parent : FS.getPath(parent), name); return FS.symlink(target, path); }, - // Creates all necessary folders so that path exists and is a directory, if they do - // not already exist. - // If a non-directory blocks doing so, throws EEXIST. - createFolders: function(path, canRead, canWrite) { - if (path === '/') return; - var mode = FS.getMode(canRead, canWrite); - var parts = path.split('/'); - for (var i = 2; i <= parts.length; i++) { - var curr = parts.slice(0, i).join('/'); - var seen = FS.analyzePath(curr).object; - if (!seen) { - FS.mkdir(curr, mode); - } else if (!FS.isDir(seen.mode)) { - throw new FS.ErrnoError(ERRNO_CODES.EEXIST); - } - } - }, // Makes sure a file's contents are loaded. Returns whether the file has // been loaded successfully. No-op for files that have been loaded already. forceLoadFile: function(obj) { diff --git a/src/library_lz4.js b/src/library_lz4.js index f4a7d51c26ada..1c46c4601886d 100644 --- a/src/library_lz4.js +++ b/src/library_lz4.js @@ -29,7 +29,7 @@ mergeInto(LibraryManager.library, { pack['metadata'].files.forEach(function(file) { var dir = PATH.dirname(file.filename); var name = PATH.basename(file.filename); - FS.createFolders(dir, true, true); + FS.createPath('', dir, true, true); var parent = FS.analyzePath(dir).object; LZ4.createNode(parent, name, LZ4.FILE_MODE, 0, { compressedData: compressedData, From 281e0661c1c876d9793bac420e2c9191f3aae30e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Sep 2015 16:12:17 -0700 Subject: [PATCH 53/64] mention that lz4 files are read only --- src/settings.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/settings.js b/src/settings.js index f794211fc1cc7..51cf487f19ecf 100644 --- a/src/settings.js +++ b/src/settings.js @@ -251,10 +251,11 @@ var LZ4 = 0; // Enable this to support lz4-compressed file packages. They are st // decompressed on the fly, avoiding storing the entire decompressed data in memory at once. // You can precompress a file package using --lz4 in the file packager, or compress one at // runtime, using LZ4.loadPackage(); - // Limitations: LZ4-compressed files are only decompressed when needed, so they are not available - // for special preloading operations like pre-decoding of images using browser codecs, - // preloadPlugin stuff, etc. - + // Limitations: + // * LZ4-compressed files are only decompressed when needed, so they are not available + // for special preloading operations like pre-decoding of images using browser codecs, + // preloadPlugin stuff, etc. + // * LZ4 files are read-only. var DISABLE_EXCEPTION_CATCHING = 0; // Disables generating code to actually catch exceptions. If the code you // are compiling does not actually rely on catching exceptions (but the From 68b8bfa82a1d45d8ba941994fb651f37c19587d1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Sep 2015 16:16:12 -0700 Subject: [PATCH 54/64] more lz4 docs --- src/settings.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/settings.js b/src/settings.js index 51cf487f19ecf..5c985032843d2 100644 --- a/src/settings.js +++ b/src/settings.js @@ -249,8 +249,9 @@ var STB_IMAGE = 0; // Enables building of stb-image, a tiny public-domain librar var LZ4 = 0; // Enable this to support lz4-compressed file packages. They are stored compressed in memory, and // decompressed on the fly, avoiding storing the entire decompressed data in memory at once. - // You can precompress a file package using --lz4 in the file packager, or compress one at - // runtime, using LZ4.loadPackage(); + // You can compress a file package using --lz4 in the file packager. + // (You can also manually compress one on the client, using LZ4.loadPackage(), but doing it ahead of + // time in the file packager is generally better.) // Limitations: // * LZ4-compressed files are only decompressed when needed, so they are not available // for special preloading operations like pre-decoding of images using browser codecs, From 24a7d2294ab2324e3d0880a8747169a3f957d318 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Sep 2015 16:40:10 -0700 Subject: [PATCH 55/64] remove some broken test code --- tests/test_other.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_other.py b/tests/test_other.py index eb310bba3a0ba..2e484aa9b39ca 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -2091,10 +2091,6 @@ def clean(txt): out = clean(out) out2 = clean(out2) assert out == out2 - # sanity check that we do generate different code for different inputs - out3, err3 = Popen([PYTHON, FILE_PACKAGER, 'test.data', '--preload', 'data2.txt', 'data2.txt@waka.txt'], stdout=PIPE, stderr=PIPE).communicate() - out3 = clean(out3) - assert out != out3 # verify '--separate-metadata' option produces separate metadata file os.chdir('..') Popen([PYTHON, FILE_PACKAGER, 'test.data', '--preload', 'data1.txt', '--preload', 'subdir/data2.txt', '--js-output=immutable.js', '--separate-metadata']).communicate() From dda99522dc08fa9c99f297d5f9a853ed9a3105f3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Sep 2015 17:05:35 -0700 Subject: [PATCH 56/64] LZ4 in emcc tells it to add --lz4 to the file packager --- emcc | 2 ++ tests/test_browser.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/emcc b/emcc index e6f3317d26ae8..df741b30eefdf 100755 --- a/emcc +++ b/emcc @@ -1333,6 +1333,8 @@ try: file_args.append('--no-heap-copy') if not use_closure_compiler: file_args.append('--no-closure') + if shared.Settings.LZ4: + file_args.append('--lz4') file_code = execute([shared.PYTHON, shared.FILE_PACKAGER, unsuffixed(target) + '.data'] + file_args, stdout=PIPE)[0] pre_js = file_code + pre_js diff --git a/tests/test_browser.py b/tests/test_browser.py index 7353341eff489..f2d9b95f6727b 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -1042,6 +1042,12 @@ def test_fs_lz4fs_package(self): random_data[17] = 'X' open('file3.txt', 'w').write(''.join(random_data)) + # compress in emcc, -s LZ4=1 tells it to tell the file packager + print 'emcc-normal' + self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '2', args=['-s', 'LZ4=1', '--preload-file', 'file1.txt', '--preload-file', 'subdir/file2.txt', '--preload-file', 'file3.txt'], timeout=60) + print ' emcc-opts' + self.btest(os.path.join('fs', 'test_lz4fs.cpp'), '2', args=['-s', 'LZ4=1', '--preload-file', 'file1.txt', '--preload-file', 'subdir/file2.txt', '--preload-file', 'file3.txt', '-O2'], timeout=60) + # compress in the file packager, on the server. the client receives compressed data and can just use it. this is typical usage print 'normal' out = subprocess.check_output([PYTHON, FILE_PACKAGER, 'files.data', '--preload', 'file1.txt', 'subdir/file2.txt', 'file3.txt', '--lz4']) From 71bf8ed55ed03a96a018f76bf2003acf32db3235 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Sep 2015 17:07:52 -0700 Subject: [PATCH 57/64] update lz4 docs --- src/settings.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/settings.js b/src/settings.js index 5c985032843d2..d59637d273469 100644 --- a/src/settings.js +++ b/src/settings.js @@ -249,9 +249,10 @@ var STB_IMAGE = 0; // Enables building of stb-image, a tiny public-domain librar var LZ4 = 0; // Enable this to support lz4-compressed file packages. They are stored compressed in memory, and // decompressed on the fly, avoiding storing the entire decompressed data in memory at once. - // You can compress a file package using --lz4 in the file packager. - // (You can also manually compress one on the client, using LZ4.loadPackage(), but doing it ahead of - // time in the file packager is generally better.) + // If you run the file packager separately, you still need to build the main program with this flag, + // and also pass --lz4 to the file packager. + // (You can also manually compress one on the client, using LZ4.loadPackage(), but that is less + // recommended.) // Limitations: // * LZ4-compressed files are only decompressed when needed, so they are not available // for special preloading operations like pre-decoding of images using browser codecs, From c500a84c17fe29ce6997dab4fdf6f18da2b6bfcf Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 8 Sep 2015 17:54:15 -0700 Subject: [PATCH 58/64] emcc --separate-asm option --- emcc | 23 +++++++++++++++++++ .../docs/optimizing/Optimizing-Code.rst | 6 ++++- site/source/docs/tools_reference/emcc.rst | 3 +++ tests/test_browser.py | 7 ++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/emcc b/emcc index df741b30eefdf..bfbd61a6d4cee 100755 --- a/emcc +++ b/emcc @@ -415,6 +415,7 @@ try: proxy_to_worker = False default_object_extension = '.o' valid_abspaths = [] + separate_asm = False def is_valid_abspath(path_name): # Any path that is underneath the emscripten repository root must be ok. @@ -648,6 +649,9 @@ try: valid_abspaths.append(newargs[i+1]) newargs[i] = '' newargs[i+1] = '' + elif newargs[i] == '--separate-asm': + separate_asm = True + newargs[i] = '' elif newargs[i].startswith(('-I', '-L')): path_name = newargs[i][2:] if not absolute_warning_shown and os.path.isabs(path_name) and not is_valid_abspath(path_name): @@ -845,6 +849,8 @@ try: assert not (Compression.on and final_suffix != 'html'), 'Compression only works when generating HTML' + assert not (separate_asm and final_suffix != 'html'), '--separate-asm requires building to HTML' + # If we are using embind and generating JS, now is the time to link in bind.cpp if bind and final_suffix in JS_CONTAINING_SUFFIXES: input_files.append((next_arg_index, shared.path_from_root('system', 'lib', 'embind', 'bind.cpp'))) @@ -1854,6 +1860,23 @@ try { })(); ''' % os.path.basename(memfile)) + script_inline + if separate_asm: + un_src() + asm_target = js_target[:-3] + '.asm.js' + temp_target = misc_temp_files.get(suffix='.js').name + execute([shared.PYTHON, shared.path_from_root('tools', 'separate_asm.py'), js_target, asm_target, temp_target]) + shutil.move(temp_target, js_target) + script_inline = ''' + var script = document.createElement('script'); + script.src = "%s"; + script.onload = function() { + setTimeout(function() { + %s + }, 1); // delaying even 1ms is enough to allow compilation memory to be reclaimed + }; + document.body.appendChild(script); +''' % (os.path.basename(asm_target), script_inline) + html = open(target, 'w') assert (script_src or script_inline) and not (script_src and script_inline) if script_src: diff --git a/site/source/docs/optimizing/Optimizing-Code.rst b/site/source/docs/optimizing/Optimizing-Code.rst index 3dc36b6e45378..13f6e40b4d4fc 100644 --- a/site/source/docs/optimizing/Optimizing-Code.rst +++ b/site/source/docs/optimizing/Optimizing-Code.rst @@ -91,12 +91,16 @@ Very large codebases The previous section on reducing code size can be helpful on very large codebases. In addition, here are some other topics that might be useful. +.. _optimizing-code-separating_asm: + Avoid memory spikes by separating out asm.js -------------------------------------------- By default Emscripten emits one JS file, containing the entire codebase: Both the asm.js code that was compiled, and the general code that sets up the environment, connects to browser APIs, etc. in a very large codebase, this can be inefficient in terms of memory usage, as having all of that in one script means the JS engine might use some memory to parse and compile the asm.js, and might not free it before starting to run the codebase. And in a large game, starting to run the code might allocate a large typed array for memory, so you might see a "spike" of memory, after which temporary compilation memory will be freed. And if big enough, that spike can cause the browser to run out of memory and fail to load the application. This is a known problem on `Chrome `_ (other browsers do not seem to have this issue). -A workaround is to separate out the asm.js into another file, and to make sure that the browser has a turn of the event loop between compiling the asm.js module and starting to run the application. This can be achieved as follows: +A workaround is to separate out the asm.js into another file, and to make sure that the browser has a turn of the event loop between compiling the asm.js module and starting to run the application. This can be achieved by running **emcc** with ``--separate-asm``. + +You can also do this manually, as follows: * Run ``tools/separate_asm.py``. This receives as inputs the filename of the full project, and two filenames to emit: the asm.js file and a file for everything else. * Load the asm.js script first, then after a turn of the event loop, the other one, for example using code like this in your HTML file: diff --git a/site/source/docs/tools_reference/emcc.rst b/site/source/docs/tools_reference/emcc.rst index dc5370b17054f..f0c12d6765e2d 100644 --- a/site/source/docs/tools_reference/emcc.rst +++ b/site/source/docs/tools_reference/emcc.rst @@ -438,6 +438,9 @@ Options that are modified or new in *emcc* are listed below: ``-c`` Tells *emcc* to generate LLVM bitcode (which can then be linked with other bitcode files), instead of compiling all the way to JavaScript. +``--separate-asm`` + Emits asm.js in one file, and the rest of the code in another, and emits HTML that loads the asm.js first, in order to reduce memory load during startup. See :ref:`optimizing-code-separating_asm`. + .. _emcc-environment-variables: diff --git a/tests/test_browser.py b/tests/test_browser.py index f2d9b95f6727b..e80d72b2f7fad 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -2853,6 +2853,13 @@ def test_separate_asm(self): ''') self.run_browser('two.html', None, '/report_result?0') + self.clear() + assert not os.path.exists('tests.asm.js') + self.btest('browser_test_hello_world.c', expected='0', args=['-O' + str(opts), '--separate-asm']) + assert os.path.exists('test.asm.js') + os.unlink('test.asm.js') + self.run_browser('test.html', None, '[no http server activity]', timeout=5) # fail without the asm + def test_emterpretify_file(self): open('shell.html', 'w').write('''