Skip to content

Commit 4e3c1b3

Browse files
committed
Optimize GL emulation prepareClientAttributes. Fix issues where the slow path was not able to handle unaligned source data. Annotate some unsupported cases. Remove liveClientAttributes, which does not seem to be a win in profiles.
1 parent dfd9cf8 commit 4e3c1b3

File tree

1 file changed

+98
-76
lines changed

1 file changed

+98
-76
lines changed

src/library_gl.js

+98-76
Original file line numberDiff line numberDiff line change
@@ -3367,7 +3367,6 @@ var LibraryGL = {
33673367
totalEnabledClientAttributes: 0,
33683368
enabledClientAttributes: [0, 0],
33693369
clientAttributes: [], // raw data, including possible unneeded ones
3370-
liveClientAttributes: [], // the ones actually alive in the current computation, sorted
33713370
currentRenderer: null, // Caches the currently active FFP emulation renderer, so that it does not have to be re-looked up unless relevant state changes.
33723371
modifiedClientAttributes: false,
33733372
clientActiveTexture: 0,
@@ -3430,17 +3429,17 @@ var LibraryGL = {
34303429
if (GLImmediate.currentRenderer) {
34313430
return GLImmediate.currentRenderer;
34323431
}
3433-
// return a renderer object given the liveClientAttributes
34343432
// we maintain a cache of renderers, optimized to not generate garbage
3435-
var attributes = GLImmediate.liveClientAttributes;
34363433
var cacheMap = GLImmediate.rendererCache;
34373434
var temp;
34383435
var keyView = cacheMap.getStaticKeyView().reset();
34393436

34403437
// By attrib state:
34413438
var enabledAttributesKey = 0;
3442-
for (var i = 0; i < attributes.length; i++) {
3443-
enabledAttributesKey |= 1 << attributes[i].name;
3439+
for (var i = 0; i < GLImmediate.MAX_TEXTURES+3; i++) {
3440+
if (GLImmediate.enabledClientAttributes[i]) {
3441+
enabledAttributesKey |= 1 << i;
3442+
}
34443443
}
34453444
keyView.next(enabledAttributesKey);
34463445

@@ -3471,7 +3470,13 @@ var LibraryGL = {
34713470
var renderer = keyView.get();
34723471
if (!renderer) {
34733472
#if GL_DEBUG
3474-
Module.printErr('generating renderer for ' + JSON.stringify(attributes));
3473+
var liveClientAttributes = [];
3474+
for (var i = 0; i < GLImmediate.MAX_TEXTURES+3; i++) {
3475+
if (GLImmediate.enabledClientAttributes[i]) {
3476+
liveClientAttributes.push(clientAttributes[i]);
3477+
}
3478+
}
3479+
Module.printErr('generating renderer for ' + JSON.stringify(liveClientAttributes));
34753480
#endif
34763481
renderer = GLImmediate.createRenderer();
34773482
GLImmediate.currentRenderer = renderer;
@@ -4077,96 +4082,113 @@ var LibraryGL = {
40774082
// does not work for glBegin/End, where we generate renderer components dynamically and then
40784083
// disable them ourselves, but it does help with glDrawElements/Arrays.
40794084
if (!GLImmediate.modifiedClientAttributes) {
4085+
#if GL_ASSERTIONS
4086+
if ((GLImmediate.stride & 3) != 0) {
4087+
Runtime.warnOnce('Warning: Rendering from client side vertex arrays where stride (' + GLImmediate.stride + ') is not a multiple of four! This is not currently supported!');
4088+
}
4089+
#endif
40804090
GLImmediate.vertexCounter = (GLImmediate.stride * count) / 4; // XXX assuming float
40814091
return;
40824092
}
40834093
GLImmediate.modifiedClientAttributes = false;
40844094

4085-
var stride = 0, start;
4086-
var attributes = GLImmediate.liveClientAttributes;
4087-
attributes.length = 0;
4088-
for (var i = 0; i < GLImmediate.NUM_ATTRIBUTES; i++) {
4089-
if (GLImmediate.enabledClientAttributes[i]) attributes.push(GLImmediate.clientAttributes[i]);
4090-
}
4091-
attributes.sort(function(x, y) { return !x ? (!y ? 0 : 1) : (!y ? -1 : (x.pointer - y.pointer)) });
4092-
start = GL.currArrayBuffer ? 0 : attributes[0].pointer;
4093-
var multiStrides = false;
4094-
for (var i = 0; i < attributes.length; i++) {
4095-
var attribute = attributes[i];
4096-
if (!attribute) break;
4097-
if (stride != 0 && stride != attribute.stride) multiStrides = true;
4098-
if (attribute.stride) stride = attribute.stride;
4095+
// The role of prepareClientAttributes is to examine the set of client-side vertex attribute buffers
4096+
// that user code has submitted, and to prepare them to be uploaded to a VBO in GPU memory
4097+
// (since WebGL does not support client-side rendering, i.e. rendering from vertex data in CPU memory)
4098+
// User can submit vertex data generally in three different configurations:
4099+
// 1. Fully planar: all attributes are in their own separate tightly-packed arrays in CPU memory.
4100+
// 2. Fully interleaved: all attributes share a single array where data is interleaved something like (pos,uv,normal), (pos,uv,normal), ...
4101+
// 3. Complex hybrid: Multiple separate arrays that either are sparsely strided, and/or partially interleave vertex attributes.
4102+
4103+
// For simplicity, we support the case (2) as the fast case. For (1) and (3), we do a memory copy of the
4104+
// vertex data here to prepare a relayouted buffer that is of the structure in case (2). The reason
4105+
// for this is that it allows the emulation code to get away with using just one VBO buffer for rendering,
4106+
// and not have to maintain multiple ones. Therefore cases (1) and (3) will be very slow, and case (2) is fast.
4107+
4108+
// Detect which case we are in by using a quick heuristic by examining the strides of the buffers. If all the buffers have identical
4109+
// stride, we assume we have case (2), otherwise we have something more complex.
4110+
var clientStartPointer = 0x7FFFFFFF;
4111+
var bytes = 0; // Total number of bytes taken up by a single vertex.
4112+
var minStride = 0x7FFFFFFF;
4113+
var maxStride = 0;
4114+
for (var i = 0; i < 3+GLImmediate.MAX_TEXTURES; i++) {
4115+
if (GLImmediate.enabledClientAttributes[i]) {
4116+
var attr = GLImmediate.clientAttributes[i];
4117+
clientStartPointer = Math.min(clientStartPointer, attr.pointer);
4118+
attr.sizeBytes = attr.size * GL.byteSizeByType[attr.type - GL.byteSizeByTypeRoot];
4119+
bytes += attr.sizeBytes;
4120+
minStride = Math.min(minStride, attr.stride);
4121+
maxStride = Math.max(maxStride, attr.stride);
4122+
}
40994123
}
41004124

4101-
if (multiStrides) stride = 0; // we will need to restride
4102-
var bytes = 0; // total size in bytes
4103-
if (!stride && !beginEnd) {
4104-
// beginEnd can not have stride in the attributes, that is fine. otherwise,
4105-
// no stride means that all attributes are in fact packed. to keep the rest of
4106-
// our emulation code simple, we perform unpacking/restriding here. this adds overhead, so
4107-
// it is a good idea to not hit this!
4108-
#if ASSERTIONS
4109-
Runtime.warnOnce('Unpacking/restriding attributes, this is slow and dangerous');
4125+
if ((minStride != maxStride || maxStride < bytes) && !beginEnd) {
4126+
// We are in cases (1) or (3): slow path, shuffle the data around into a single interleaved vertex buffer.
4127+
// The immediate-mode glBegin()/glEnd() vertex submission gets automatically generated in appropriate layout,
4128+
// so never need to come down this path if that was used.
4129+
#if GL_ASSERTIONS
4130+
Runtime.warnOnce('Rendering from planar client-side vertex arrays. This is a very slow emulation path! Use interleaved vertex arrays for best performance.');
41104131
#endif
41114132
if (!GLImmediate.restrideBuffer) GLImmediate.restrideBuffer = _malloc(GL.MAX_TEMP_BUFFER_SIZE);
4112-
start = GLImmediate.restrideBuffer;
4113-
#if ASSERTIONS
4114-
assert(start % 4 == 0);
4115-
#endif
4133+
var start = GLImmediate.restrideBuffer;
4134+
bytes = 0;
41164135
// calculate restrided offsets and total size
4117-
for (var i = 0; i < attributes.length; i++) {
4118-
var attribute = attributes[i];
4119-
if (!attribute) break;
4120-
var size = attribute.size * GL.byteSizeByType[attribute.type - GL.byteSizeByTypeRoot];
4121-
if (size % 4 != 0) size += 4 - (size % 4); // align everything
4122-
attribute.offset = bytes;
4123-
bytes += size;
4136+
for (var i = 0; i < GLImmediate.clientAttributes.length; i++) {
4137+
if (GLImmediate.enabledClientAttributes[i]) {
4138+
var attribute = GLImmediate.clientAttributes[i];
4139+
var size = attribute.sizeBytes;
4140+
if (size % 4 != 0) size += 4 - (size % 4); // align everything
4141+
attribute.offset = bytes;
4142+
bytes += size;
4143+
}
41244144
}
4125-
#if ASSERTIONS
4126-
assert(count*bytes <= GL.MAX_TEMP_BUFFER_SIZE);
4127-
#endif
4128-
// copy out the data (we need to know the stride for that, and define attribute.pointer
4129-
for (var i = 0; i < attributes.length; i++) {
4130-
var attribute = attributes[i];
4131-
if (!attribute) break;
4132-
var size4 = Math.floor((attribute.size * GL.byteSizeByType[attribute.type - GL.byteSizeByTypeRoot])/4);
4133-
for (var j = 0; j < count; j++) {
4134-
for (var k = 0; k < size4; k++) { // copy in chunks of 4 bytes, our alignment makes this possible
4135-
HEAP32[((start + attribute.offset + bytes*j)>>2) + k] = HEAP32[(attribute.pointer>>2) + j*size4 + k];
4145+
// copy out the data (we need to know the stride for that, and define attribute.pointer)
4146+
for (var i = 0; i < GLImmediate.clientAttributes.length; i++) {
4147+
if (GLImmediate.enabledClientAttributes[i]) {
4148+
var attribute = GLImmediate.clientAttributes[i];
4149+
var srcStride = Math.max(attribute.sizeBytes, attribute.stride);
4150+
if ((srcStride & 3) == 0 && (attribute.sizeBytes & 3) == 0) {
4151+
var size4 = attribute.sizeBytes>>2;
4152+
var srcStride4 = Math.max(attribute.sizeBytes, attribute.stride)>>2;
4153+
for (var j = 0; j < count; j++) {
4154+
for (var k = 0; k < size4; k++) { // copy in chunks of 4 bytes, our alignment makes this possible
4155+
HEAP32[((start + attribute.offset + bytes*j)>>2) + k] = HEAP32[(attribute.pointer>>2) + j*srcStride4 + k];
4156+
}
4157+
}
4158+
} else {
4159+
for (var j = 0; j < count; j++) {
4160+
for (var k = 0; k < attribute.sizeBytes; k++) { // source data was not aligned to multiples of 4, must copy byte by byte.
4161+
HEAP8[start + attribute.offset + bytes*j + k] = HEAP8[attribute.pointer + j*srcStride + k];
4162+
}
4163+
}
41364164
}
4165+
attribute.pointer = start + attribute.offset;
41374166
}
4138-
attribute.pointer = start + attribute.offset;
41394167
}
4168+
GLImmediate.stride = bytes;
4169+
GLImmediate.vertexPointer = start;
41404170
} else {
4141-
// normal situation, everything is strided and in the same buffer
4142-
for (var i = 0; i < attributes.length; i++) {
4143-
var attribute = attributes[i];
4144-
if (!attribute) break;
4145-
attribute.offset = attribute.pointer - start;
4146-
if (attribute.offset > bytes) { // ensure we start where we should
4147-
#if ASSERTIONS
4148-
assert((attribute.offset - bytes)%4 == 0); // XXX assuming 4-alignment
4149-
#endif
4150-
bytes += attribute.offset - bytes;
4151-
}
4152-
bytes += attribute.size * GL.byteSizeByType[attribute.type - GL.byteSizeByTypeRoot];
4153-
if (bytes % 4 != 0) bytes += 4 - (bytes % 4); // XXX assuming 4-alignment
4171+
// case (2): fast path, all data is interleaved to a single vertex array so we can get away with a single VBO upload.
4172+
if (GL.currArrayBuffer) {
4173+
GLImmediate.vertexPointer = 0;
4174+
} else {
4175+
GLImmediate.vertexPointer = clientStartPointer;
41544176
}
4155-
#if ASSERTIONS
4156-
assert(beginEnd || bytes <= stride); // if not begin-end, explicit stride should make sense with total byte size
4157-
#endif
4158-
if (bytes < stride) { // ensure the size is that of the stride
4159-
bytes = stride;
4177+
for (var i = 0; i < 3+GLImmediate.MAX_TEXTURES; i++) {
4178+
if (GLImmediate.enabledClientAttributes[i]) {
4179+
var attr = GLImmediate.clientAttributes[i];
4180+
attr.offset = attr.pointer - clientStartPointer; // Compute what will be the offset of this attribute in the VBO after we upload.
4181+
}
41604182
}
4183+
GLImmediate.stride = Math.max(maxStride, bytes);
41614184
}
4162-
GLImmediate.stride = bytes;
4163-
41644185
if (!beginEnd) {
4165-
bytes *= count;
4166-
if (!GL.currArrayBuffer) {
4167-
GLImmediate.vertexPointer = start;
4186+
#if GL_ASSERTIONS
4187+
if ((GLImmediate.stride & 3) != 0) {
4188+
Runtime.warnOnce('Warning: Rendering from client side vertex arrays where stride (' + GLImmediate.stride + ') is not a multiple of four! This is not currently supported!');
41684189
}
4169-
GLImmediate.vertexCounter = bytes / 4; // XXX assuming float
4190+
#endif
4191+
GLImmediate.vertexCounter = (GLImmediate.stride * count) / 4; // XXX assuming float
41704192
}
41714193
},
41724194

0 commit comments

Comments
 (0)