From fa341ef1597dd7284b98a246d0207f7d9bbeb252 Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Mon, 15 Dec 2014 15:29:49 -0800
Subject: [PATCH 01/15] SIMD shift and select operators

---
 lib/Target/JSBackend/JSBackend.cpp |  79 +++++++++++++++-
 test/CodeGen/JS/simd-select.ll     |  30 ++++++
 test/CodeGen/JS/simd-shift.ll      | 142 +++++++++++++++++++++++++++++
 3 files changed, 249 insertions(+), 2 deletions(-)
 create mode 100644 test/CodeGen/JS/simd-select.ll
 create mode 100644 test/CodeGen/JS/simd-shift.ll
diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index 4b8da31df04..28060d44617 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -455,6 +455,7 @@ namespace {
     void generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw_string_ostream& Code);
     void generateICmpExpression(const ICmpInst *I, raw_string_ostream& Code);
     void generateFCmpExpression(const FCmpInst *I, raw_string_ostream& Code);
+    void generateShiftExpression(const BinaryOperator *I, raw_string_ostream& Code);
     void generateUnrolledExpression(const User *I, raw_string_ostream& Code);
     bool generateSIMDExpression(const User *I, raw_string_ostream& Code);
     void generateExpression(const User *I, raw_string_ostream& Code);
@@ -1449,9 +1450,61 @@ void JSWriter::generateFCmpExpression(const FCmpInst *I, raw_string_ostream& Cod
     Code << ")";
 }
 
+static const Value *getElement(const Value *V, unsigned i) {
+    if (const InsertElementInst *II = dyn_cast<InsertElementInst>(V)) {
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(2))) {
+            if (CI->equalsInt(i))
+                return II->getOperand(1);
+        }
+        return getElement(II->getOperand(0), i);
+    }
+    return NULL;
+}
+
+static const Value *getSplatValue(const Value *V) {
+    if (const Constant *C = dyn_cast<Constant>(V))
+        return C->getSplatValue();
+
+    VectorType *VTy = cast<VectorType>(V->getType());
+    const Value *Result = NULL;
+    for (unsigned i = 0; i < VTy->getNumElements(); ++i) {
+        const Value *E = getElement(V, i);
+        if (!E)
+            return NULL;
+        if (!Result)
+            Result = E;
+        else if (Result != E)
+            return NULL;
+    }
+    return Result;
+
+}
+
+void JSWriter::generateShiftExpression(const BinaryOperator *I, raw_string_ostream& Code) {
+    // If we're shifting every lane by the same amount (shifting by a splat value
+    // then we can use a ByScalar shift.
+    const Value *Count = I->getOperand(1);
+    if (const Value *Splat = getSplatValue(Count)) {
+        Code << getAssignIfNeeded(I) << "SIMD_int32x4_";
+        if (I->getOpcode() == Instruction::AShr)
+            Code << "shiftRightArithmeticByScalar";
+        else if (I->getOpcode() == Instruction::LShr)
+            Code << "shiftRightLogicalByScalar";
+        else
+            Code << "shiftLeftByScalar";
+        Code << "(" << getValueAsStr(I->getOperand(0)) << ", " << getValueAsStr(Splat) << ")";
+        return;
+    }
+
+    // SIMD.js does not currently have vector-vector shifts.
+    generateUnrolledExpression(I, Code);
+}
+
 void JSWriter::generateUnrolledExpression(const User *I, raw_string_ostream& Code) {
   VectorType *VT = cast<VectorType>(I->getType());
 
+  Code << getAssignIfNeeded(I);
+
   if (VT->getElementType()->isIntegerTy()) {
     Code << "SIMD_int32x4(";
   } else {
@@ -1481,6 +1534,18 @@ void JSWriter::generateUnrolledExpression(const User *I, raw_string_ostream& Cod
         Code << "(" << getValueAsStr(I->getOperand(0)) << Lane << ">>>0) / ("
              << getValueAsStr(I->getOperand(1)) << Lane << ">>>0)>>>0";
         break;
+      case Instruction::AShr:
+        Code << "(" << getValueAsStr(I->getOperand(0)) << Lane << "|0) >> ("
+             << getValueAsStr(I->getOperand(1)) << Lane << "|0)|0";
+        break;
+      case Instruction::LShr:
+        Code << "(" << getValueAsStr(I->getOperand(0)) << Lane << "|0) >>> ("
+             << getValueAsStr(I->getOperand(1)) << Lane << "|0)|0";
+        break;
+      case Instruction::Shl:
+        Code << "(" << getValueAsStr(I->getOperand(0)) << Lane << "|0) << ("
+             << getValueAsStr(I->getOperand(1)) << Lane << "|0)|0";
+        break;
       default: I->dump(); error("invalid unrolled vector instr"); break;
     }
   }
@@ -1514,8 +1579,13 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
         Code << getAssignIfNeeded(I) << getValueAsStr(I->getOperand(0));
         break;
       case Instruction::Select:
-        assert(I->getOperand(0)->getType()->isIntegerTy(1) && "vector-of-i1 select not yet supported");
-        // select arms are SIMD values, no special handling
+        // Since we represent vectors of i1 as vectors of sign extended wider integers,
+        // selecting on them is just an elementwise select.
+        if (cast<VectorType>(I->getType())->getElementType()->isIntegerTy()) {
+          Code << getAssignIfNeeded(I) << "SIMD_int32x4_select(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << "," << getValueAsStr(I->getOperand(2)) << ")"; break;
+        } else {
+          Code << getAssignIfNeeded(I) << "SIMD_float32x4_select(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << "," << getValueAsStr(I->getOperand(2)) << ")"; break;
+        }
         return false;
       case Instruction::FAdd: Code << getAssignIfNeeded(I) << "SIMD_float32x4_add(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break;
       case Instruction::FMul: Code << getAssignIfNeeded(I) << "SIMD_float32x4_mul(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break;
@@ -1580,6 +1650,11 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
         // doesn't support them).
         generateUnrolledExpression(I, Code);
         break;
+      case Instruction::AShr:
+      case Instruction::LShr:
+      case Instruction::Shl:
+        generateShiftExpression(cast<BinaryOperator>(I), Code);
+        break;
     }
     return true;
   } else {
diff --git a/test/CodeGen/JS/simd-select.ll b/test/CodeGen/JS/simd-select.ll
new file mode 100644
index 00000000000..d28347feac9
--- /dev/null
+++ b/test/CodeGen/JS/simd-select.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+; CHECK: function _test0($a,$b,$cond) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  $cond = SIMD_int32x4($cond);
+; CHECK:  $cmp = SIMD_int32x4_select($cond,$a,$b);
+; CHECK:  return (SIMD_int32x4($cmp));
+; CHECK: }
+define <4 x i32> @test0(<4 x i32> %a, <4 x i32> %b, <4 x i1> %cond) nounwind {
+entry:
+  %cmp = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %cmp
+}
+
+; CHECK: function _test1($a,$b,$cond) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  $cond = SIMD_int32x4($cond);
+; CHECK:  $cmp = SIMD_float32x4_select($cond,$a,$b);
+; CHECK:  return (SIMD_float32x4($cmp));
+; CHECK: }
+define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %cond) nounwind {
+entry:
+  %cmp = select <4 x i1> %cond, <4 x float> %a, <4 x float> %b
+  ret <4 x float> %cmp
+}
diff --git a/test/CodeGen/JS/simd-shift.ll b/test/CodeGen/JS/simd-shift.ll
new file mode 100644
index 00000000000..09819876fd8
--- /dev/null
+++ b/test/CodeGen/JS/simd-shift.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+; CHECK: function _test0($a) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $shl = SIMD_int32x4_shiftLeftByScalar($a, 3);
+; CHECK:  return (SIMD_int32x4($shl));
+; CHECK: }
+define <4 x i32> @test0(<4 x i32> %a) {
+entry:
+  %shl = shl <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %shl
+}
+
+; CHECK: function _test1($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = $b|0;
+; CHECK:  SIMD_int32x4_shiftLeftByScalar($a, $b);
+; CHECK:  return (SIMD_int32x4($shl));
+; CHECK: }
+define <4 x i32> @test1(<4 x i32> %a, i32 %b) {
+entry:
+  %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3
+  %shl = shl <4 x i32> %a, %vecinit3
+  ret <4 x i32> %shl
+}
+
+; CHECK: function _test2($a,$b,$c) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = $b|0;
+; CHECK:  $c = $c|0;
+; CHECK:  var $shl = SIMD_int32x4(0,0,0,0), $vecinit3 = SIMD_int32x4(0,0,0,0), label = 0, sp = 0;
+; CHECK:  $vecinit3 = SIMD_int32x4($b, $b, $c, $b);
+; CHECK:  $shl = SIMD_int32x4(($a.x|0) << ($vecinit3.x|0)|0, ($a.y|0) << ($vecinit3.y|0)|0, ($a.z|0) << ($vecinit3.z|0)|0, ($a.w|0) << ($vecinit3.w|0)|0);
+; CHECK:  return (SIMD_int32x4($shl));
+; CHECK: }
+define <4 x i32> @test2(<4 x i32> %a, i32 %b, i32 %c) {
+entry:
+  %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3
+  %shl = shl <4 x i32> %a, %vecinit3
+  ret <4 x i32> %shl
+}
+
+; CHECK: function _test3($a) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  SIMD_int32x4_shiftRightArithmeticByScalar($a, 3);
+; CHECK:  return (SIMD_int32x4($shr));
+; CHECK: }
+define <4 x i32> @test3(<4 x i32> %a) {
+entry:
+  %shr = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %shr
+}
+
+; CHECK: function _test4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = $b|0;
+; CHECK:  SIMD_int32x4_shiftRightArithmeticByScalar($a, $b);
+; CHECK:  return (SIMD_int32x4($shr));
+; CHECK: }
+define <4 x i32> @test4(<4 x i32> %a, i32 %b) {
+entry:
+  %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3
+  %shr = ashr <4 x i32> %a, %vecinit3
+  ret <4 x i32> %shr
+}
+
+; CHECK: function _test5($a,$b,$c) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = $b|0;
+; CHECK:  $c = $c|0;
+; CHECK:  var $shr = SIMD_int32x4(0,0,0,0), $vecinit3 = SIMD_int32x4(0,0,0,0), label = 0, sp = 0;
+; CHECK:  $vecinit3 = SIMD_int32x4($b, $c, $b, $b);
+; CHECK:  $shr = SIMD_int32x4(($a.x|0) >> ($vecinit3.x|0)|0, ($a.y|0) >> ($vecinit3.y|0)|0, ($a.z|0) >> ($vecinit3.z|0)|0, ($a.w|0) >> ($vecinit3.w|0)|0);
+; CHECK:  return (SIMD_int32x4($shr));
+; CHECK: }
+define <4 x i32> @test5(<4 x i32> %a, i32 %b, i32 %c) {
+entry:
+  %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %c, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3
+  %shr = ashr <4 x i32> %a, %vecinit3
+  ret <4 x i32> %shr
+}
+
+; CHECK: function _test6($a) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  SIMD_int32x4_shiftRightLogicalByScalar($a, 3);
+; CHECK:  return (SIMD_int32x4($lshr));
+; CHECK: }
+define <4 x i32> @test6(<4 x i32> %a) {
+entry:
+  %lshr = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %lshr
+}
+
+; CHECK: function _test7($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = $b|0;
+; CHECK:  $lshr = SIMD_int32x4_shiftRightLogicalByScalar($a, $b);
+; CHECK:  return (SIMD_int32x4($lshr));
+; CHECK: }
+define <4 x i32> @test7(<4 x i32> %a, i32 %b) {
+entry:
+  %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3
+  %lshr = lshr <4 x i32> %a, %vecinit3
+  ret <4 x i32> %lshr
+}
+
+; CHECK: function _test8($a,$b,$c) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = $b|0;
+; CHECK:  $c = $c|0;
+; CHECK:  var $lshr = SIMD_int32x4(0,0,0,0), $vecinit3 = SIMD_int32x4(0,0,0,0), label = 0, sp = 0;
+; CHECK:  $vecinit3 = SIMD_int32x4($b, $b, $b, $c);
+; CHECK:  $lshr = SIMD_int32x4(($a.x|0) >>> ($vecinit3.x|0)|0, ($a.y|0) >>> ($vecinit3.y|0)|0, ($a.z|0) >>> ($vecinit3.z|0)|0, ($a.w|0) >>> ($vecinit3.w|0)|0);
+; CHECK:  return (SIMD_int32x4($lshr));
+; CHECK: }
+define <4 x i32> @test8(<4 x i32> %a, i32 %b, i32 %c) {
+entry:
+  %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %c, i32 3
+  %lshr = lshr <4 x i32> %a, %vecinit3
+  ret <4 x i32> %lshr
+}

From fe65a7bc42894c454ab299a02d3e343c0c1dc76f Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Wed, 17 Dec 2014 21:05:02 -0800
Subject: [PATCH 02/15] Implement bitcast from SIMD to i128.

---
 lib/Target/JSBackend/ExpandI64.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/lib/Target/JSBackend/ExpandI64.cpp b/lib/Target/JSBackend/ExpandI64.cpp
index 168e66063b3..06c7662a84a 100644
--- a/lib/Target/JSBackend/ExpandI64.cpp
+++ b/lib/Target/JSBackend/ExpandI64.cpp
@@ -815,6 +815,16 @@ bool ExpandI64::splitInst(Instruction *I) {
         Chunks.push_back(L);
         Chunks.push_back(H);
         break;
+      } else if (isa<VectorType>(I->getOperand(0)->getType()) && !isa<VectorType>(I->getType())) {
+          unsigned NumElts = getNumChunks(I->getType());
+          VectorType *IVTy = VectorType::get(i32, NumElts);
+          Instruction *B = CopyDebug(new BitCastInst(I->getOperand(0), IVTy), I);
+          for (unsigned i = 0; i < NumElts; ++i) {
+              Constant *Idx = ConstantInt::get(i32, i);
+              Instruction *Ext = CopyDebug(ExtractElementInst::Create(B, Idx, "", I), I);
+              Chunks.push_back(Ext);
+          }
+          break;
       } else {
         // no-op bitcast
         assert(I->getType() == I->getOperand(0)->getType());

From 234168bb60ea2f98688ed4a9107db208915d70bf Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Wed, 17 Dec 2014 21:05:24 -0800
Subject: [PATCH 03/15] Expand vector insert/extract with dynamic index

This patch adds a pass which expands vector insert/extract element with
a non-constant index into memory operations.
---
 include/llvm/InitializePasses.h               |   1 +
 include/llvm/Transforms/NaCl.h                |   1 +
 lib/Target/JSBackend/CMakeLists.txt           |   1 +
 lib/Target/JSBackend/ExpandI64.cpp            |   2 +-
 .../JSBackend/ExpandInsertExtractElement.cpp  | 106 ++++++++++++++++++
 lib/Target/JSBackend/JSBackend.cpp            |   1 +
 lib/Target/JSBackend/OptPasses.h              |   1 +
 test/CodeGen/JS/expand-insertextract.ll       |  31 +++++
 8 files changed, 143 insertions(+), 1 deletion(-)
 create mode 100644 lib/Target/JSBackend/ExpandInsertExtractElement.cpp
 create mode 100644 test/CodeGen/JS/expand-insertextract.ll

diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 30325f66bf2..19f18df659c 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -298,6 +298,7 @@ void initializeRewritePNaClLibraryCallsPass(PassRegistry&);
 void initializeStripAttributesPass(PassRegistry&);
 void initializeStripMetadataPass(PassRegistry&);
 void initializeExpandI64Pass(PassRegistry&); // XXX EMSCRIPTEN
+void initializeExpandInsertExtractElementPass(PassRegistry&); // XXX EMSCRIPTEN
 void initializeLowerEmExceptionsPass(PassRegistry&); // XXX EMSCRIPTEN
 void initializeLowerEmSetjmpPass(PassRegistry&); // XXX EMSCRIPTEN
 void initializeLowerEmAsyncifyPass(PassRegistry&); // XXX EMSCRIPTEN
diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h
index d3b4e8bc64d..22b95cae18c 100644
--- a/include/llvm/Transforms/NaCl.h
+++ b/include/llvm/Transforms/NaCl.h
@@ -53,6 +53,7 @@ ModulePass *createStripAttributesPass();
 ModulePass *createStripMetadataPass();
 
 ModulePass *createExpandI64Pass(); // XXX EMSCRIPTEN
+ModulePass *createExpandInsertExtractElementPass(); // XXX EMSCRIPTEN
 ModulePass *createLowerEmExceptionsPass(); // XXX EMSCRIPTEN
 ModulePass *createLowerEmSetjmpPass(); // XXX EMSCRIPTEN
 ModulePass *createNoExitRuntimePass(); // XXX EMSCRIPTEN
diff --git a/lib/Target/JSBackend/CMakeLists.txt b/lib/Target/JSBackend/CMakeLists.txt
index ac04b0a2a71..c1194467f4b 100644
--- a/lib/Target/JSBackend/CMakeLists.txt
+++ b/lib/Target/JSBackend/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_target(JSBackendCodeGen
   AllocaManager.cpp
   ExpandI64.cpp
+  ExpandInsertExtractElement.cpp
   JSBackend.cpp
   JSTargetMachine.cpp
   JSTargetTransformInfo.cpp
diff --git a/lib/Target/JSBackend/ExpandI64.cpp b/lib/Target/JSBackend/ExpandI64.cpp
index 06c7662a84a..49296107c9e 100644
--- a/lib/Target/JSBackend/ExpandI64.cpp
+++ b/lib/Target/JSBackend/ExpandI64.cpp
@@ -818,7 +818,7 @@ bool ExpandI64::splitInst(Instruction *I) {
       } else if (isa<VectorType>(I->getOperand(0)->getType()) && !isa<VectorType>(I->getType())) {
           unsigned NumElts = getNumChunks(I->getType());
           VectorType *IVTy = VectorType::get(i32, NumElts);
-          Instruction *B = CopyDebug(new BitCastInst(I->getOperand(0), IVTy), I);
+          Instruction *B = CopyDebug(new BitCastInst(I->getOperand(0), IVTy, "", I), I);
           for (unsigned i = 0; i < NumElts; ++i) {
               Constant *Idx = ConstantInt::get(i32, i);
               Instruction *Ext = CopyDebug(ExtractElementInst::Create(B, Idx, "", I), I);
diff --git a/lib/Target/JSBackend/ExpandInsertExtractElement.cpp b/lib/Target/JSBackend/ExpandInsertExtractElement.cpp
new file mode 100644
index 00000000000..fbc7b8667aa
--- /dev/null
+++ b/lib/Target/JSBackend/ExpandInsertExtractElement.cpp
@@ -0,0 +1,106 @@
+//==- ExpandInsertExtractElement.cpp - Expand vector insert and extract -=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===------------------------------------------------------------------===//
+//
+// This pass expands insertelement and extractelement instructions with
+// variable indices, which SIMD.js doesn't natively support yet.
+//
+//===------------------------------------------------------------------===//
+
+#include "OptPasses.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <map>
+#include <vector>
+
+#include "llvm/Support/raw_ostream.h"
+
+#ifdef NDEBUG
+#undef assert
+#define assert(x) { if (!(x)) report_fatal_error(#x); }
+#endif
+
+using namespace llvm;
+
+namespace {
+
+  class ExpandInsertExtractElement : public FunctionPass {
+    bool Changed;
+
+  public:
+    static char ID;
+    ExpandInsertExtractElement() : FunctionPass(ID) {
+      initializeExpandInsertExtractElementPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+  };
+}
+
+char ExpandInsertExtractElement::ID = 0;
+INITIALIZE_PASS(ExpandInsertExtractElement, "expand-insert-extract-elements",
+                "Expand and lower insert and extract element operations",
+                false, false)
+
+// Utilities
+
+static Instruction *CopyDebug(Instruction *NewInst, Instruction *Original) {
+  NewInst->setDebugLoc(Original->getDebugLoc());
+  return NewInst;
+}
+
+bool ExpandInsertExtractElement::runOnFunction(Function &F) {
+  Changed = false;
+
+  Instruction *Entry = F.getEntryBlock().begin();
+  Type *Int32 = Type::getInt32Ty(F.getContext());
+  Constant *Zero = ConstantInt::get(Int32, 0);
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+
+    if (InsertElementInst *III = dyn_cast<InsertElementInst>(Inst)) {
+      if (isa<ConstantInt>(III->getOperand(2)))
+          continue;
+
+      Instruction *A = new AllocaInst(III->getType(), 0, "", Entry);
+      CopyDebug(new StoreInst(III->getOperand(0), A, III), III);
+
+      Value *Idxs[] = { Zero, III->getOperand(2) };
+      Instruction *B = CopyDebug(GetElementPtrInst::Create(A, Idxs, "", III), III);
+      CopyDebug(new StoreInst(III->getOperand(1), B, III), III);
+
+      Instruction *L = CopyDebug(new LoadInst(A, "", III), III);
+      III->replaceAllUsesWith(L);
+      III->eraseFromParent();
+    } else if (ExtractElementInst *EII = dyn_cast<ExtractElementInst>(Inst)) {
+      if (isa<ConstantInt>(EII->getOperand(1)))
+          continue;
+
+      Instruction *A = new AllocaInst(EII->getOperand(0)->getType(), 0, "", Entry);
+      CopyDebug(new StoreInst(EII->getOperand(0), A, EII), EII);
+
+      Value *Idxs[] = { Zero, EII->getOperand(1) };
+      Instruction *B = CopyDebug(GetElementPtrInst::Create(A, Idxs, "", EII), EII);
+      Instruction *L = CopyDebug(new LoadInst(B, "", EII), EII);
+      EII->replaceAllUsesWith(L);
+      EII->eraseFromParent();
+    }
+  }
+
+  return Changed;
+}
+
+Pass *llvm::createExpandInsertExtractElementPass() {
+  return new ExpandInsertExtractElement();
+}
diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index 28060d44617..05a2d31c3e8 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -2864,6 +2864,7 @@ bool JSTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                           AnalysisID StopAfter) {
   assert(FileType == TargetMachine::CGFT_AssemblyFile);
 
+  PM.add(createExpandInsertExtractElementPass());
   PM.add(createExpandI64Pass());
 
   CodeGenOpt::Level OptLevel = getOptLevel();
diff --git a/lib/Target/JSBackend/OptPasses.h b/lib/Target/JSBackend/OptPasses.h
index 2f90b568b01..5e236569f9e 100644
--- a/lib/Target/JSBackend/OptPasses.h
+++ b/lib/Target/JSBackend/OptPasses.h
@@ -17,6 +17,7 @@ namespace llvm {
   extern FunctionPass *createSimplifyAllocasPass();
 
   extern Pass *createExpandI64Pass();
+  extern Pass *createExpandInsertExtractElementPass();
 
 } // End llvm namespace
 
diff --git a/test/CodeGen/JS/expand-insertextract.ll b/test/CodeGen/JS/expand-insertextract.ll
new file mode 100644
index 00000000000..7a247380f32
--- /dev/null
+++ b/test/CodeGen/JS/expand-insertextract.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+; CHECK:  sp = STACKTOP;
+; CHECK:  STACKTOP = STACKTOP + 16|0;
+; CHECK:  $0 = sp;
+; CHECK:  SIMD_float32x4_store(HEAPU8, $0, $p);
+; CHECK:  $1 = (($0) + ($i<<2)|0);
+; CHECK:  $2 = +HEAPF32[$1>>2];
+; CHECK:  STACKTOP = sp;return (+$2);
+; CHECK: }
+define float @ext(<4 x float> %p, i32 %i) {
+  %f = extractelement <4 x float> %p, i32 %i
+  ret float %f
+}
+
+; CHECK:  sp = STACKTOP;
+; CHECK:  STACKTOP = STACKTOP + 16|0;
+; CHECK:  $0 = sp;
+; CHECK:  SIMD_float32x4_store(HEAPU8, $0, $p);
+; CHECK:  $1 = (($0) + ($i<<2)|0);
+; CHECK:  HEAPF32[$1>>2] = $f;
+; CHECK:  $2 = SIMD_float32x4_load(HEAPU8, $0);
+; CHECK:  STACKTOP = sp;return (SIMD_float32x4($2));
+; CHECK: }
+define <4 x float> @ins(<4 x float> %p, float %f, i32 %i) {
+  %v = insertelement <4 x float> %p, float %f, i32 %i
+  ret <4 x float> %v
+}

From 9e06c5a45f1d24e0270057ab86754542509571b6 Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Thu, 18 Dec 2014 10:28:34 -0800
Subject: [PATCH 04/15] Add a few more TargetTransformInfo heuristics

SIMD.js doesn't yet have vector-vector shifts or insert/extract by
non-constant index, so score those as expensive.
---
 .../JSBackend/JSTargetTransformInfo.cpp       | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/lib/Target/JSBackend/JSTargetTransformInfo.cpp b/lib/Target/JSBackend/JSTargetTransformInfo.cpp
index 74c2201d54e..dcb92b4d643 100644
--- a/lib/Target/JSBackend/JSTargetTransformInfo.cpp
+++ b/lib/Target/JSBackend/JSTargetTransformInfo.cpp
@@ -68,6 +68,15 @@ class JSTTI : public ImmutablePass, public TargetTransformInfo {
   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
 
   virtual unsigned getRegisterBitWidth(bool Vector) const;
+
+  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                          OperandValueKind Opd1Info = OK_AnyValue,
+                                          OperandValueKind Opd2Info = OK_AnyValue) const;
+
+  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+                                      unsigned Index = -1) const;
+
+  virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
 };
 
 } // end anonymous namespace
@@ -101,3 +110,56 @@ unsigned JSTTI::getRegisterBitWidth(bool Vector) const {
 
   return 32;
 }
+
+unsigned JSTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                       OperandValueKind Opd1Info,
+                                       OperandValueKind Opd2Info) const {
+  const unsigned Nope = 65536;
+
+  unsigned Cost = TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info);
+
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+    switch (VTy->getNumElements()) {
+    case 4:
+      // SIMD.js supports int32x4 and float32x4, and we can emulate <4 x i1>.
+      if (!VTy->getElementType()->isIntegerTy(1) &&
+          !VTy->getElementType()->isIntegerTy(32) &&
+          !VTy->getElementType()->isFloatTy())
+      {
+          return Nope;
+      }
+      break;
+    default:
+      // Wait until the other types are optimized.
+      return Nope;
+    }
+
+    switch (Opcode) {
+      case Instruction::LShr:
+      case Instruction::AShr:
+      case Instruction::Shl:
+        // SIMD.js' shifts are currently only ByScalar.
+        if (Opd2Info != OK_UniformValue && Opd2Info != OK_UniformConstantValue)
+          Cost = Cost * VTy->getNumElements() + 100;
+        break;
+    }
+  }
+
+  return Cost;
+}
+
+unsigned JSTTI::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const {
+  unsigned Cost = TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+
+  // SIMD.js' insert/extract currently only take constant indices.
+  if (Index == -1u)
+      return Cost + 100;
+
+  return Cost;
+}
+
+void JSTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
+  // We generally don't want a lot of unrolling.
+  UP.Partial = false;
+  UP.Runtime = false;
+}

From 4f1e8b811742d75bb114a8964e4a4f6b8659bad8 Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Thu, 18 Dec 2014 10:49:56 -0800
Subject: [PATCH 05/15] Support for non-power-of-2 SIMD types

LLVM's optimizer generates types like <3 x float> in some cases, so we
should support them. Also, this is a chance to make use of
loadXYZ/storeXYZ and friends.
---
 lib/Target/JSBackend/JSBackend.cpp | 83 +++++++++++++++++++-----------
 test/CodeGen/JS/simd-loadstore.ll  | 68 ++++++++++++++++++++++++
 2 files changed, 121 insertions(+), 30 deletions(-)
 create mode 100644 test/CodeGen/JS/simd-loadstore.ll

diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index 05a2d31c3e8..51610aa72ee 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -386,7 +386,8 @@ namespace {
       // of the compare that produced them.
       assert(VT->getElementType()->getPrimitiveSizeInBits() == 32 ||
              VT->getElementType()->getPrimitiveSizeInBits() == 1);
-      assert(VT->getNumElements() == 4);
+      assert(VT->getBitWidth() <= 128);
+      assert(VT->getNumElements() <= 4);
       UsesSIMD = true;
     }
 
@@ -1074,18 +1075,23 @@ std::string JSWriter::getConstant(const Constant* CV, AsmCast sign) {
       return "0";
     }
   } else if (const ConstantDataVector *DV = dyn_cast<ConstantDataVector>(CV)) {
-    return getConstantVector(cast<VectorType>(CV->getType())->getElementType(),
-                             getConstant(DV->getElementAsConstant(0)),
-                             getConstant(DV->getElementAsConstant(1)),
-                             getConstant(DV->getElementAsConstant(2)),
-                             getConstant(DV->getElementAsConstant(3)));
+    unsigned NumElts = cast<VectorType>(DV->getType())->getNumElements();
+    Type *EltTy = cast<VectorType>(DV->getType())->getElementType();
+    Constant *Undef = UndefValue::get(EltTy);
+    return getConstantVector(EltTy,
+                             getConstant(NumElts > 0 ? DV->getElementAsConstant(0) : Undef),
+                             getConstant(NumElts > 1 ? DV->getElementAsConstant(1) : Undef),
+                             getConstant(NumElts > 2 ? DV->getElementAsConstant(2) : Undef),
+                             getConstant(NumElts > 3 ? DV->getElementAsConstant(3) : Undef));
   } else if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) {
-    assert(V->getNumOperands() == 4);
+    unsigned NumElts = cast<VectorType>(CV->getType())->getNumElements();
+    Type *EltTy = cast<VectorType>(CV->getType())->getElementType();
+    Constant *Undef = UndefValue::get(EltTy);
     return getConstantVector(cast<VectorType>(V->getType())->getElementType(),
-                             getConstant(V->getOperand(0)),
-                             getConstant(V->getOperand(1)),
-                             getConstant(V->getOperand(2)),
-                             getConstant(V->getOperand(3)));
+                             getConstant(NumElts > 0 ? V->getOperand(0) : Undef),
+                             getConstant(NumElts > 1 ? V->getOperand(1) : Undef),
+                             getConstant(NumElts > 2 ? V->getOperand(2) : Undef),
+                             getConstant(NumElts > 3 ? V->getOperand(3) : Undef));
   } else if (const ConstantArray *CA = dyn_cast<const ConstantArray>(CV)) {
     // handle things like [i8* bitcast (<{ i32, i32, i32 }>* @_ZTISt9bad_alloc to i8*)] which clang can emit for landingpads
     assert(CA->getNumOperands() == 1);
@@ -1300,20 +1306,22 @@ void JSWriter::generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw
   // Check whether can generate SIMD.js swizzle or shuffle.
   std::string A = getValueAsStr(SVI->getOperand(0));
   std::string B = getValueAsStr(SVI->getOperand(1));
-  int Mask0 = SVI->getMaskValue(0);
-  int Mask1 = SVI->getMaskValue(1);
-  int Mask2 = SVI->getMaskValue(2);
-  int Mask3 = SVI->getMaskValue(3);
+  int OpNumElements = cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+  int ResultNumElements = SVI->getType()->getNumElements();
+  int Mask0 = ResultNumElements > 0 ? SVI->getMaskValue(0) : -1;
+  int Mask1 = ResultNumElements > 1 ? SVI->getMaskValue(1) : -1;
+  int Mask2 = ResultNumElements > 2 ? SVI->getMaskValue(2) : -1;
+  int Mask3 = ResultNumElements > 3 ? SVI->getMaskValue(3) : -1;
   bool swizzleA = false;
   bool swizzleB = false;
-  if ((Mask0 < 4) && (Mask1 < 4) &&
-      (Mask2 < 4) && (Mask3 < 4)) {
+  if ((Mask0 < OpNumElements) && (Mask1 < OpNumElements) &&
+      (Mask2 < OpNumElements) && (Mask3 < OpNumElements)) {
     swizzleA = true;
   }
-  if ((Mask0 < 0 || (Mask0 >= 4 && Mask0 < 8)) &&
-      (Mask1 < 0 || (Mask1 >= 4 && Mask1 < 8)) &&
-      (Mask2 < 0 || (Mask2 >= 4 && Mask2 < 8)) &&
-      (Mask3 < 0 || (Mask3 >= 4 && Mask3 < 8))) {
+  if ((Mask0 < 0 || (Mask0 >= OpNumElements && Mask0 < OpNumElements * 2)) &&
+      (Mask1 < 0 || (Mask1 >= OpNumElements && Mask1 < OpNumElements * 2)) &&
+      (Mask2 < 0 || (Mask2 >= OpNumElements && Mask2 < OpNumElements * 2)) &&
+      (Mask3 < 0 || (Mask3 >= OpNumElements && Mask3 < OpNumElements * 2))) {
     swizzleB = true;
   }
   assert(!(swizzleA && swizzleB));
@@ -1324,18 +1332,22 @@ void JSWriter::generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw
     } else {
       Code << "SIMD_float32x4_swizzle(" << T;
     }
-    for (unsigned int i = 0; i < 4; i++) {
+    int i = 0;
+    for (; i < ResultNumElements; ++i) {
       Code << ", ";
       int Mask = SVI->getMaskValue(i);
       if (Mask < 0) {
         Code << 0;
-      } else if (Mask < 4) {
+      } else if (Mask < OpNumElements) {
         Code << Mask;
       } else {
-        assert(Mask < 8);
-        Code << (Mask-4);
+        assert(Mask < OpNumElements * 2);
+        Code << (Mask-OpNumElements);
       }
     }
+    for (; i < 4; ++i) {
+      Code << ", 0";
+    }
     Code << ")";
     return;
   }
@@ -1354,7 +1366,10 @@ void JSWriter::generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw
   for (unsigned int i = 0; i < Indices.size(); ++i) {
     if (i != 0)
       Code << ", ";
-    Code << Indices[i];
+    int Mask = Indices[i];
+    if (Mask >= OpNumElements)
+        Mask = Mask - OpNumElements + 4;
+    Code << Mask;
   }
 
   Code << ")";
@@ -1626,11 +1641,15 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
         const LoadInst *LI = cast<LoadInst>(I);
         const Value *P = LI->getPointerOperand();
         std::string PS = getValueAsStr(P);
+
+        // Determine if this is a partial store.
+        std::string Part = (std::string[]) { "X", "XY", "XYZ", "" }[VT->getNumElements() - 1];
+
         Code << getAssignIfNeeded(I);
         if (VT->getElementType()->isIntegerTy()) {
-          Code << "SIMD_int32x4_load(HEAPU8, " << PS << ")";
+          Code << "SIMD_int32x4_load" << Part << "(HEAPU8, " << PS << ")";
         } else {
-          Code << "SIMD_float32x4_load(HEAPU8, " << PS << ")";
+          Code << "SIMD_float32x4_load" << Part << "(HEAPU8, " << PS << ")";
         }
         break;
       }
@@ -1666,10 +1685,14 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
       std::string PS = getOpName(P);
       std::string VS = getValueAsStr(SI->getValueOperand());
       Code << getAdHocAssign(PS, P->getType()) << getValueAsStr(P) << ';';
+
+      // Determine if this is a partial store.
+      std::string Part = (std::string[]) { "X", "XY", "XYZ", "" }[VT->getNumElements() - 1];
+
       if (VT->getElementType()->isIntegerTy()) {
-        Code << "SIMD_int32x4_store(HEAPU8, " << PS << ", " << VS << ")";
+        Code << "SIMD_int32x4_store" << Part << "(HEAPU8, " << PS << ", " << VS << ")";
       } else {
-        Code << "SIMD_float32x4_store(HEAPU8, " << PS << ", " << VS << ")";
+        Code << "SIMD_float32x4_store" << Part << "(HEAPU8, " << PS << ", " << VS << ")";
       }
       return true;
     } else if (Operator::getOpcode(I) == Instruction::ExtractElement) {
diff --git a/test/CodeGen/JS/simd-loadstore.ll b/test/CodeGen/JS/simd-loadstore.ll
new file mode 100644
index 00000000000..60ed1679512
--- /dev/null
+++ b/test/CodeGen/JS/simd-loadstore.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+; CHECK: function _fx1($p) {
+; CHECK:  $p = $p|0;
+; CHECK:  var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0)
+; CHECK:  $t = SIMD_float32x4_loadX(HEAPU8, $p);
+; CHECK:  $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+0.5),Math_fround(+0),Math_fround(+0),Math_fround(+0)));
+; CHECK:  $q = $p;SIMD_float32x4_storeX(HEAPU8, $q, $s);
+; CHECK:  return;
+; CHECK: }
+define void @fx1(i8* %p) {
+    %q = bitcast i8* %p to <1 x float>*
+    %t = load <1 x float>* %q
+    %s = fadd <1 x float> %t, <float 0.5>
+    store <1 x float> %s, <1 x float>* %q
+    ret void
+}
+
+; CHECK: function _fx2($p) {
+; CHECK:  $p = $p|0;
+; CHECK:  var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0)
+; CHECK:  $t = SIMD_float32x4_loadXY(HEAPU8, $p);
+; CHECK:  $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+3.5),Math_fround(+7.5),Math_fround(+0),Math_fround(+0)));
+; CHECK:  $q = $p;SIMD_float32x4_storeXY(HEAPU8, $q, $s);
+; CHECK:  return;
+; CHECK: }
+define void @fx2(i8* %p) {
+    %q = bitcast i8* %p to <2 x float>*
+    %t = load <2 x float>* %q
+    %s = fadd <2 x float> %t, <float 3.5, float 7.5>
+    store <2 x float> %s, <2 x float>* %q
+    ret void
+}
+
+; CHECK: function _fx3($p) {
+; CHECK:  $p = $p|0;
+; CHECK:  var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0)
+; CHECK:  $t = SIMD_float32x4_loadXYZ(HEAPU8, $p);
+; CHECK:  $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+1.5),Math_fround(+4.5),Math_fround(+6.5),Math_fround(+0)));
+; CHECK:  $q = $p;SIMD_float32x4_storeXYZ(HEAPU8, $q, $s);
+; CHECK:  return;
+; CHECK: }
+define void @fx3(i8* %p) {
+    %q = bitcast i8* %p to <3 x float>*
+    %t = load <3 x float>* %q
+    %s = fadd <3 x float> %t, <float 1.5, float 4.5, float 6.5>
+    store <3 x float> %s, <3 x float>* %q
+    ret void
+}
+
+; CHECK: function _fx4($p) {
+; CHECK:  $p = $p|0;
+; CHECK:  var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0)
+; CHECK:  $t = SIMD_float32x4_load(HEAPU8, $p);
+; CHECK:  $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+9.5),Math_fround(+5.5),Math_fround(+1.5),Math_fround(+-3.5)));
+; CHECK:  $q = $p;SIMD_float32x4_store(HEAPU8, $q, $s);
+; CHECK:  return;
+; CHECK: }
+define void @fx4(i8* %p) {
+    %q = bitcast i8* %p to <4 x float>*
+    %t = load <4 x float>* %q
+    %s = fadd <4 x float> %t, <float 9.5, float 5.5, float 1.5, float -3.5>
+    store <4 x float> %s, <4 x float>* %q
+    ret void
+}

From 99db845fd89b177097670e2a26839635ba0c2dee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= <jujjyl@gmail.com>
Date: Thu, 18 Dec 2014 16:01:50 -0500
Subject: [PATCH 06/15] Fix Windows build on Visual Studio 2010 by removing use
 of nonstandard language extension.

---
 lib/Target/JSBackend/JSBackend.cpp | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index 51610aa72ee..a78fd7af223 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -1642,8 +1642,13 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
         const Value *P = LI->getPointerOperand();
         std::string PS = getValueAsStr(P);
 
-        // Determine if this is a partial store.
-        std::string Part = (std::string[]) { "X", "XY", "XYZ", "" }[VT->getNumElements() - 1];
+        // Determine if this is a partial load.
+        static const std::string partialAccess[4] = { "X", "XY", "XYZ", "" };
+        if (VT->getNumElements() < 1 || VT->getNumElements() > 4) {
+          error("invalid number of lanes in SIMD operation!");
+          break;
+        }
+        const std::string &Part = partialAccess[VT->getNumElements() - 1];
 
         Code << getAssignIfNeeded(I);
         if (VT->getElementType()->isIntegerTy()) {
@@ -1687,7 +1692,12 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
       Code << getAdHocAssign(PS, P->getType()) << getValueAsStr(P) << ';';
 
       // Determine if this is a partial store.
-      std::string Part = (std::string[]) { "X", "XY", "XYZ", "" }[VT->getNumElements() - 1];
+      static const std::string partialAccess[4] = { "X", "XY", "XYZ", "" };
+      if (VT->getNumElements() < 1 || VT->getNumElements() > 4) {
+        error("invalid number of lanes in SIMD operation!");
+        return false;
+      }
+      const std::string &Part = partialAccess[VT->getNumElements() - 1];
 
       if (VT->getElementType()->isIntegerTy()) {
         Code << "SIMD_int32x4_store" << Part << "(HEAPU8, " << PS << ", " << VS << ")";

From 68ac5906e588ba68713c3866fc05e24d1bf59813 Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Thu, 18 Dec 2014 11:41:01 -0800
Subject: [PATCH 07/15] Fix codegen for SIMD select with scalar condition.

---
 lib/Target/JSBackend/JSBackend.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index a78fd7af223..d5baaba2743 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -1596,11 +1596,15 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
       case Instruction::Select:
         // Since we represent vectors of i1 as vectors of sign extended wider integers,
         // selecting on them is just an elementwise select.
-        if (cast<VectorType>(I->getType())->getElementType()->isIntegerTy()) {
-          Code << getAssignIfNeeded(I) << "SIMD_int32x4_select(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << "," << getValueAsStr(I->getOperand(2)) << ")"; break;
-        } else {
-          Code << getAssignIfNeeded(I) << "SIMD_float32x4_select(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << "," << getValueAsStr(I->getOperand(2)) << ")"; break;
+        if (isa<VectorType>(I->getOperand(0)->getType())) {
+          if (cast<VectorType>(I->getType())->getElementType()->isIntegerTy()) {
+            Code << getAssignIfNeeded(I) << "SIMD_int32x4_select(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << "," << getValueAsStr(I->getOperand(2)) << ")"; break;
+          } else {
+            Code << getAssignIfNeeded(I) << "SIMD_float32x4_select(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << "," << getValueAsStr(I->getOperand(2)) << ")"; break;
+          }
+          return true;
         }
+        // Otherwise we have a scalar condition, so it's a ?: operator.
         return false;
       case Instruction::FAdd: Code << getAssignIfNeeded(I) << "SIMD_float32x4_add(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break;
       case Instruction::FMul: Code << getAssignIfNeeded(I) << "SIMD_float32x4_mul(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break;

From edfd9d3f6d91a2ee946a2c88082710e5acf2ea22 Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Thu, 18 Dec 2014 11:41:28 -0800
Subject: [PATCH 08/15] Fix SIMD shuffle with undef mask elements.

---
 lib/Target/JSBackend/JSBackend.cpp |  7 +++++--
 test/CodeGen/JS/simd-select.ll     | 26 ++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index d5baaba2743..206e956382d 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -1368,8 +1368,11 @@ void JSWriter::generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw
       Code << ", ";
     int Mask = Indices[i];
     if (Mask >= OpNumElements)
-        Mask = Mask - OpNumElements + 4;
-    Code << Mask;
+      Mask = Mask - OpNumElements + 4;
+    if (Mask < 0)
+      Code << 0;
+    else
+      Code << Mask;
   }
 
   Code << ")";
diff --git a/test/CodeGen/JS/simd-select.ll b/test/CodeGen/JS/simd-select.ll
index d28347feac9..7547b199049 100644
--- a/test/CodeGen/JS/simd-select.ll
+++ b/test/CodeGen/JS/simd-select.ll
@@ -28,3 +28,29 @@ entry:
   %cmp = select <4 x i1> %cond, <4 x float> %a, <4 x float> %b
   ret <4 x float> %cmp
 }
+
+; CHECK: function _test2($a,$b,$cond) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  $cond = $cond|0;
+; CHECK:  $cmp = $cond ? $a : $b;
+; CHECK:  return (SIMD_int32x4($cmp));
+; CHECK: }
+define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b, i1 %cond) nounwind {
+entry:
+  %cmp = select i1 %cond, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %cmp
+}
+
+; CHECK: function _test3($a,$b,$cond) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  $cond = $cond|0;
+; CHECK:  $cmp = $cond ? $a : $b;
+; CHECK:  return (SIMD_float32x4($cmp));
+; CHECK: }
+define <4 x float> @test3(<4 x float> %a, <4 x float> %b, i1 %cond) nounwind {
+entry:
+  %cmp = select i1 %cond, <4 x float> %a, <4 x float> %b
+  ret <4 x float> %cmp
+}

From 112d26e53e79d53d99dc18f2823cd2a343847139 Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Thu, 18 Dec 2014 11:42:11 -0800
Subject: [PATCH 09/15] Remove the SIMD signMask polyfill hack.

---
 lib/Target/JSBackend/CallHandlers.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/Target/JSBackend/CallHandlers.h b/lib/Target/JSBackend/CallHandlers.h
index 0f2cbabf20d..f3991dad1ea 100644
--- a/lib/Target/JSBackend/CallHandlers.h
+++ b/lib/Target/JSBackend/CallHandlers.h
@@ -454,8 +454,7 @@ DEF_CALL_HANDLER(llvm_cttz_i32, {
 
 // vector ops
 DEF_CALL_HANDLER(emscripten_float32x4_signmask, {
-  // TODO: use signMaskPolyfill explicitly for now, until the builtin signMask is ready.
-  return getAssign(CI) + getValueAsStr(CI->getOperand(0)) + ".signMaskPolyfill";
+  return getAssign(CI) + getValueAsStr(CI->getOperand(0)) + ".signMask";
 })
 
 #define DEF_BUILTIN_HANDLER(name, to) \

From bf0e35737716b5956cdcfcbced0576fac5bb0613 Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Thu, 18 Dec 2014 12:10:06 -0800
Subject: [PATCH 10/15] Add tests for int32x4, float32x4, int32x3, and
 float32x3 shuffles.

---
 test/CodeGen/JS/simd-shuffle.ll | 524 ++++++++++++++++++++++++++++++++
 1 file changed, 524 insertions(+)
 create mode 100644 test/CodeGen/JS/simd-shuffle.ll

diff --git a/test/CodeGen/JS/simd-shuffle.ll b/test/CodeGen/JS/simd-shuffle.ll
new file mode 100644
index 00000000000..3e7667c56f4
--- /dev/null
+++ b/test/CodeGen/JS/simd-shuffle.ll
@@ -0,0 +1,524 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+; CHECK: function _splat_int32x4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @splat_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32><i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _swizzle_int32x4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 3, 1, 2);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @swizzle_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32><i32 0, i32 3, i32 1, i32 2>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _swizzlehi_int32x4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($b, 2, 1, 3, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @swizzlehi_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32><i32 6, i32 5, i32 7, i32 4>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _shuffleXY_float32x4to3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 7, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @shuffleXY_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32><i32 7, i32 0, i32 undef>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _shuffle_int32x4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 7, 0, 5, 3);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @shuffle_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32><i32 7, i32 0, i32 5, i32 3>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _shuffleXY_int32x4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 7, 0, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @shuffleXY_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32><i32 7, i32 0, i32 undef, i32 undef>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _splat_int32x3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @splat_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32><i32 0, i32 0, i32 0>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _swizzle_int32x3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 2, 1, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @swizzle_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32><i32 0, i32 2, i32 1>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _swizzlehi_int32x3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($b, 0, 2, 1, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @swizzlehi_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32><i32 3, i32 5, i32 4>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _shuffle_int32x3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 6, 0, 5);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @shuffle_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32><i32 5, i32 0, i32 4>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _shuffleXY_int32x3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 6, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @shuffleXY_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32><i32 5, i32 0, i32 undef>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _splat_int32x3to4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @splat_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32><i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _swizzle_int32x3to4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 2, 1, 2);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @swizzle_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32><i32 0, i32 2, i32 1, i32 2>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _swizzlehi_int32x3to4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($b, 2, 1, 0, 2);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @swizzlehi_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32><i32 5, i32 4, i32 3, i32 5>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _shuffle_int32x3to4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 6, 0, 5, 2);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @shuffle_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32><i32 5, i32 0, i32 4, i32 2>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _shuffleXY_int32x3to4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 6, 0, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @shuffleXY_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32><i32 5, i32 0, i32 undef, i32 undef>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _splat_int32x4to3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @splat_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32><i32 0, i32 0, i32 0>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _swizzle_int32x4to3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 3, 1, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @swizzle_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32><i32 0, i32 3, i32 1>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _swizzlehi_int32x4to3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($b, 2, 1, 3, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @swizzlehi_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32><i32 6, i32 5, i32 7>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _shuffle_int32x4to3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 7, 0, 5);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @shuffle_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32><i32 7, i32 0, i32 5>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _shuffleXY_int32x4to3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 7, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @shuffleXY_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32><i32 7, i32 0, i32 undef>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _splat_float32x4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @splat_float32x4(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 0, i32 0, i32 0>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _swizzle_float32x4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 3, 1, 2);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @swizzle_float32x4(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 3, i32 1, i32 2>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _swizzlehi_float32x4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($b, 2, 1, 3, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @swizzlehi_float32x4(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 6, i32 5, i32 7, i32 4>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _shuffle_float32x4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 7, 0, 5, 3);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @shuffle_float32x4(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 7, i32 0, i32 5, i32 3>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _shuffleXY_float32x4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 7, 0, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @shuffleXY_float32x4(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 7, i32 0, i32 undef, i32 undef>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _splat_float32x3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @splat_float32x3(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32><i32 0, i32 0, i32 0>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _swizzle_float32x3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 2, 1, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @swizzle_float32x3(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32><i32 0, i32 2, i32 1>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _swizzlehi_float32x3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($b, 0, 2, 1, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @swizzlehi_float32x3(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32><i32 3, i32 5, i32 4>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _shuffle_float32x3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 6, 0, 5);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @shuffle_float32x3(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32><i32 5, i32 0, i32 4>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _shuffleXY_float32x3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 6, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @shuffleXY_float32x3(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32><i32 5, i32 0, i32 undef>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _splat_float32x3to4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @splat_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32><i32 0, i32 0, i32 0, i32 0>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _swizzle_float32x3to4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 2, 1, 2);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @swizzle_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32><i32 0, i32 2, i32 1, i32 2>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _swizzlehi_float32x3to4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($b, 2, 1, 0, 2);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @swizzlehi_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32><i32 5, i32 4, i32 3, i32 5>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _shuffle_float32x3to4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 6, 0, 5, 2);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @shuffle_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32><i32 5, i32 0, i32 4, i32 2>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _shuffleXY_float32x3to4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 6, 0, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @shuffleXY_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32><i32 5, i32 0, i32 undef, i32 undef>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _splat_float32x4to3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @splat_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32><i32 0, i32 0, i32 0>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _swizzle_float32x4to3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 3, 1, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @swizzle_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32><i32 0, i32 3, i32 1>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _swizzlehi_float32x4to3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($b, 2, 1, 3, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @swizzlehi_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32><i32 6, i32 5, i32 7>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _shuffle_float32x4to3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 7, 0, 5);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @shuffle_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32><i32 7, i32 0, i32 5>
+  ret <3 x float> %sel
+}

From f0413d97f47c8a61dc1ee12f674f6dd876363907 Mon Sep 17 00:00:00 2001
From: Alon Zakai <alonzakai@gmail.com>
Date: Thu, 18 Dec 2014 17:37:12 -0800
Subject: [PATCH 11/15] clear BlockChanges in i64 lowering, they are just per
 function; fixes emscripten #3088

---
 lib/Target/JSBackend/ExpandI64.cpp |   1 +
 test/CodeGen/JS/blockchanges.ll    | 411 +++++++++++++++++++++++++++++
 2 files changed, 412 insertions(+)
 create mode 100644 test/CodeGen/JS/blockchanges.ll

diff --git a/lib/Target/JSBackend/ExpandI64.cpp b/lib/Target/JSBackend/ExpandI64.cpp
index 49296107c9e..f40ac691dcc 100644
--- a/lib/Target/JSBackend/ExpandI64.cpp
+++ b/lib/Target/JSBackend/ExpandI64.cpp
@@ -1139,6 +1139,7 @@ bool ExpandI64::runOnModule(Module &M) {
         Phi->addIncoming(Phi->getIncomingValue(Index), Change.NewBB);
       }
     }
+    PhiBlockChanges.clear();
 
     // We only visited blocks found by a DFS walk from the entry, so we haven't
     // visited any unreachable blocks, and they may still contain illegal
diff --git a/test/CodeGen/JS/blockchanges.ll b/test/CodeGen/JS/blockchanges.ll
new file mode 100644
index 00000000000..b93e6688c40
--- /dev/null
+++ b/test/CodeGen/JS/blockchanges.ll
@@ -0,0 +1,411 @@
+; RUN: llc < %s
+
+; regression check for emscripten #3088 - we were not clearing BlockChanges in i64 lowering
+
+; ModuleID = 'waka.bc'
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+%"class.ZenLib::uint128" = type <{ i64, i64 }>
+
+@.str = private unnamed_addr constant [15 x i8] c"hello, world!\0A\00", align 1
+
+@.str368164 = external hidden unnamed_addr constant [10 x i8], align 1
+@.str398167 = external hidden unnamed_addr constant [6 x i8], align 1
+@.str718199 = external hidden unnamed_addr constant [9 x i8], align 1
+@.str738201 = external hidden unnamed_addr constant [21 x i8], align 1
+@.str748202 = external hidden unnamed_addr constant [26 x i8], align 1
+@.str758203 = external hidden unnamed_addr constant [21 x i8], align 1
+@.str768204 = external hidden unnamed_addr constant [8 x i8], align 1
+@.str778205 = external hidden unnamed_addr constant [14 x i8], align 1
+@.str788206 = external hidden unnamed_addr constant [22 x i8], align 1
+@.str798207 = external hidden unnamed_addr constant [25 x i8], align 1
+@.str808208 = external hidden unnamed_addr constant [24 x i8], align 1
+@.str818209 = external hidden unnamed_addr constant [20 x i8], align 1
+@.str828210 = external hidden unnamed_addr constant [34 x i8], align 1
+@.str838211 = external hidden unnamed_addr constant [31 x i8], align 1
+@.str848212 = external hidden unnamed_addr constant [29 x i8], align 1
+@.str858213 = external hidden unnamed_addr constant [44 x i8], align 1
+@.str868214 = external hidden unnamed_addr constant [12 x i8], align 1
+@.str908218 = external hidden unnamed_addr constant [21 x i8], align 1
+@.str918219 = external hidden unnamed_addr constant [8 x i8], align 1
+@.str928220 = external hidden unnamed_addr constant [6 x i8], align 1
+@.str9210864 = external hidden unnamed_addr constant [5 x i8], align 1
+@.str514367 = external hidden unnamed_addr constant [5 x i8], align 1
+@.str214409 = external hidden unnamed_addr constant [4 x i8], align 1
+@.str20216493 = external hidden unnamed_addr constant [3 x i8], align 1
+@.str2017231 = external hidden unnamed_addr constant [11 x i8], align 1
+@.str2317234 = external hidden unnamed_addr constant [14 x i8], align 1
+@.str2417235 = external hidden unnamed_addr constant [4 x i8], align 1
+@.str2717238 = external hidden unnamed_addr constant [5 x i8], align 1
+@.str3217243 = external hidden unnamed_addr constant [4 x i8], align 1
+@.str1717689 = external hidden unnamed_addr constant [5 x i8], align 1
+@.str2104 = external hidden unnamed_addr constant [1 x i8], align 1
+
+; Function Attrs: nounwind readonly
+define hidden i8* @_ZN12MediaInfoLib22Mxf_EssenceCompressionEN6ZenLib7uint128E(%"class.ZenLib::uint128"* nocapture readonly %EssenceCompression) #0 {
+entry:
+  %hi = getelementptr inbounds %"class.ZenLib::uint128"* %EssenceCompression, i32 0, i32 1
+  %0 = load i64* %hi, align 1, !tbaa !2
+  %and = and i64 %0, -256
+  %cmp = icmp eq i64 %and, 436333716306985216
+  br i1 %cmp, label %lor.lhs.false, label %return
+
+lor.lhs.false:                                    ; preds = %entry
+  %lo = getelementptr inbounds %"class.ZenLib::uint128"* %EssenceCompression, i32 0, i32 0
+  %1 = load i64* %lo, align 1, !tbaa !7
+  %and1 = and i64 %1, -72057594037927936
+  switch i64 %and1, label %return [
+    i64 288230376151711744, label %if.end
+    i64 1008806316530991104, label %if.end
+  ]
+
+if.end:                                           ; preds = %lor.lhs.false, %lor.lhs.false
+  %shr = lshr i64 %1, 56
+  %conv = trunc i64 %shr to i32
+  %and10 = lshr i64 %1, 48
+  %and14 = lshr i64 %1, 40
+  %and18 = lshr i64 %1, 32
+  %conv20 = trunc i64 %and18 to i32
+  %and22 = lshr i64 %1, 24
+  %and26 = lshr i64 %1, 16
+  %conv28 = trunc i64 %and26 to i32
+  %and30 = lshr i64 %1, 8
+  %conv32 = trunc i64 %and30 to i32
+  switch i32 %conv, label %return [
+    i32 4, label %sw.bb
+    i32 14, label %sw.bb112
+  ]
+
+sw.bb:                                            ; preds = %if.end
+  %conv12 = trunc i64 %and10 to i32
+  %conv34 = and i32 %conv12, 255
+  switch i32 %conv34, label %return [
+    i32 1, label %sw.bb35
+    i32 2, label %sw.bb64
+  ]
+
+sw.bb35:                                          ; preds = %sw.bb
+  %conv36 = and i64 %and14, 255
+  %cond12 = icmp eq i64 %conv36, 2
+  br i1 %cond12, label %sw.bb37, label %return
+
+sw.bb37:                                          ; preds = %sw.bb35
+  %conv38 = and i32 %conv20, 255
+  switch i32 %conv38, label %return [
+    i32 1, label %sw.bb39
+    i32 2, label %sw.bb42
+  ]
+
+sw.bb39:                                          ; preds = %sw.bb37
+  %conv40 = and i64 %and22, 255
+  %cond14 = icmp eq i64 %conv40, 1
+  %. = select i1 %cond14, i8* getelementptr inbounds ([4 x i8]* @.str214409, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0)
+  br label %return
+
+sw.bb42:                                          ; preds = %sw.bb37
+  %2 = trunc i64 %and22 to i32
+  %conv43 = and i32 %2, 255
+  switch i32 %conv43, label %sw.default61 [
+    i32 1, label %sw.bb44
+    i32 2, label %return
+    i32 3, label %sw.bb56
+    i32 113, label %sw.bb60
+  ]
+
+sw.bb44:                                          ; preds = %sw.bb42
+  %conv45 = and i32 %conv28, 255
+  switch i32 %conv45, label %sw.default54 [
+    i32 0, label %return
+    i32 1, label %return
+    i32 2, label %return
+    i32 3, label %return
+    i32 4, label %return
+    i32 17, label %return
+    i32 32, label %sw.bb52
+    i32 48, label %sw.bb53
+    i32 49, label %sw.bb53
+    i32 50, label %sw.bb53
+    i32 51, label %sw.bb53
+    i32 52, label %sw.bb53
+    i32 53, label %sw.bb53
+    i32 54, label %sw.bb53
+    i32 55, label %sw.bb53
+    i32 56, label %sw.bb53
+    i32 57, label %sw.bb53
+    i32 58, label %sw.bb53
+    i32 59, label %sw.bb53
+    i32 60, label %sw.bb53
+    i32 61, label %sw.bb53
+    i32 62, label %sw.bb53
+    i32 63, label %sw.bb53
+  ]
+
+sw.bb52:                                          ; preds = %sw.bb44
+  br label %return
+
+sw.bb53:                                          ; preds = %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44
+  br label %return
+
+sw.default54:                                     ; preds = %sw.bb44
+  br label %return
+
+sw.bb56:                                          ; preds = %sw.bb42
+  %conv57 = and i64 %and26, 255
+  %cond13 = icmp eq i64 %conv57, 1
+  %.35 = select i1 %cond13, i8* getelementptr inbounds ([10 x i8]* @.str368164, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0)
+  br label %return
+
+sw.bb60:                                          ; preds = %sw.bb42
+  br label %return
+
+sw.default61:                                     ; preds = %sw.bb42
+  br label %return
+
+sw.bb64:                                          ; preds = %sw.bb
+  %conv65 = and i64 %and14, 255
+  %cond9 = icmp eq i64 %conv65, 2
+  br i1 %cond9, label %sw.bb66, label %return
+
+sw.bb66:                                          ; preds = %sw.bb64
+  %conv67 = and i32 %conv20, 255
+  switch i32 %conv67, label %return [
+    i32 1, label %sw.bb68
+    i32 2, label %sw.bb75
+  ]
+
+sw.bb68:                                          ; preds = %sw.bb66
+  %3 = trunc i64 %and22 to i32
+  %conv69 = and i32 %3, 255
+  switch i32 %conv69, label %sw.default74 [
+    i32 0, label %return
+    i32 1, label %return
+    i32 126, label %return
+    i32 127, label %return
+  ]
+
+sw.default74:                                     ; preds = %sw.bb68
+  br label %return
+
+sw.bb75:                                          ; preds = %sw.bb66
+  %conv76 = and i64 %and22, 255
+  %cond10 = icmp eq i64 %conv76, 3
+  br i1 %cond10, label %sw.bb77, label %return
+
+sw.bb77:                                          ; preds = %sw.bb75
+  %conv78 = and i32 %conv28, 255
+  switch i32 %conv78, label %return [
+    i32 1, label %sw.bb79
+    i32 2, label %sw.bb84
+    i32 3, label %sw.bb92
+    i32 4, label %sw.bb96
+  ]
+
+sw.bb79:                                          ; preds = %sw.bb77
+  %conv80 = and i32 %conv32, 255
+  switch i32 %conv80, label %sw.default83 [
+    i32 1, label %return
+    i32 16, label %sw.bb82
+  ]
+
+sw.bb82:                                          ; preds = %sw.bb79
+  br label %return
+
+sw.default83:                                     ; preds = %sw.bb79
+  br label %return
+
+sw.bb84:                                          ; preds = %sw.bb77
+  %conv85 = and i32 %conv32, 255
+  switch i32 %conv85, label %sw.default91 [
+    i32 1, label %return
+    i32 4, label %sw.bb87
+    i32 5, label %sw.bb88
+    i32 6, label %sw.bb89
+    i32 28, label %sw.bb90
+  ]
+
+sw.bb87:                                          ; preds = %sw.bb84
+  br label %return
+
+sw.bb88:                                          ; preds = %sw.bb84
+  br label %return
+
+sw.bb89:                                          ; preds = %sw.bb84
+  br label %return
+
+sw.bb90:                                          ; preds = %sw.bb84
+  br label %return
+
+sw.default91:                                     ; preds = %sw.bb84
+  br label %return
+
+sw.bb92:                                          ; preds = %sw.bb77
+  %conv93 = and i64 %and30, 255
+  %cond11 = icmp eq i64 %conv93, 1
+  %.36 = select i1 %cond11, i8* getelementptr inbounds ([14 x i8]* @.str778205, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0)
+  br label %return
+
+sw.bb96:                                          ; preds = %sw.bb77
+  %conv97 = and i32 %conv32, 255
+  switch i32 %conv97, label %sw.default106 [
+    i32 1, label %return
+    i32 2, label %sw.bb99
+    i32 3, label %sw.bb100
+    i32 4, label %sw.bb101
+    i32 5, label %sw.bb102
+    i32 6, label %sw.bb103
+    i32 7, label %sw.bb104
+    i32 8, label %sw.bb105
+  ]
+
+sw.bb99:                                          ; preds = %sw.bb96
+  br label %return
+
+sw.bb100:                                         ; preds = %sw.bb96
+  br label %return
+
+sw.bb101:                                         ; preds = %sw.bb96
+  br label %return
+
+sw.bb102:                                         ; preds = %sw.bb96
+  br label %return
+
+sw.bb103:                                         ; preds = %sw.bb96
+  br label %return
+
+sw.bb104:                                         ; preds = %sw.bb96
+  br label %return
+
+sw.bb105:                                         ; preds = %sw.bb96
+  br label %return
+
+sw.default106:                                    ; preds = %sw.bb96
+  br label %return
+
+sw.bb112:                                         ; preds = %if.end
+  %4 = trunc i64 %and10 to i32
+  %conv113 = and i32 %4, 255
+  switch i32 %conv113, label %return [
+    i32 4, label %sw.bb114
+    i32 6, label %sw.bb127
+  ]
+
+sw.bb114:                                         ; preds = %sw.bb112
+  %conv115 = and i64 %and14, 255
+  %cond5 = icmp eq i64 %conv115, 2
+  %conv117 = and i64 %and18, 255
+  %cond6 = icmp eq i64 %conv117, 1
+  %or.cond = and i1 %cond5, %cond6
+  %conv119 = and i64 %and22, 255
+  %cond7 = icmp eq i64 %conv119, 2
+  %or.cond39 = and i1 %or.cond, %cond7
+  br i1 %or.cond39, label %sw.bb120, label %return
+
+sw.bb120:                                         ; preds = %sw.bb114
+  %conv121 = and i64 %and26, 255
+  %cond8 = icmp eq i64 %conv121, 4
+  %.37 = select i1 %cond8, i8* getelementptr inbounds ([5 x i8]* @.str514367, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0)
+  br label %return
+
+sw.bb127:                                         ; preds = %sw.bb112
+  %conv128 = and i64 %and14, 255
+  %cond = icmp eq i64 %conv128, 4
+  %conv130 = and i64 %and18, 255
+  %cond1 = icmp eq i64 %conv130, 1
+  %or.cond40 = and i1 %cond, %cond1
+  %conv132 = and i64 %and22, 255
+  %cond2 = icmp eq i64 %conv132, 2
+  %or.cond41 = and i1 %or.cond40, %cond2
+  %conv134 = and i64 %and26, 255
+  %cond3 = icmp eq i64 %conv134, 4
+  %or.cond42 = and i1 %or.cond41, %cond3
+  br i1 %or.cond42, label %sw.bb135, label %return
+
+sw.bb135:                                         ; preds = %sw.bb127
+  %conv136 = and i64 %and30, 255
+  %cond4 = icmp eq i64 %conv136, 2
+  %.38 = select i1 %cond4, i8* getelementptr inbounds ([12 x i8]* @.str868214, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0)
+  br label %return
+
+return:                                           ; preds = %sw.bb135, %sw.bb127, %sw.bb120, %sw.bb114, %sw.bb112, %sw.default106, %sw.bb105, %sw.bb104, %sw.bb103, %sw.bb102, %sw.bb101, %sw.bb100, %sw.bb99, %sw.bb96, %sw.bb92, %sw.default91, %sw.bb90, %sw.bb89, %sw.bb88, %sw.bb87, %sw.bb84, %sw.default83, %sw.bb82, %sw.bb79, %sw.bb77, %sw.bb75, %sw.default74, %sw.bb68, %sw.bb68, %sw.bb68, %sw.bb68, %sw.bb66, %sw.bb64, %sw.default61, %sw.bb60, %sw.bb56, %sw.default54, %sw.bb53, %sw.bb52, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb42, %sw.bb39, %sw.bb37, %sw.bb35, %sw.bb, %if.end, %lor.lhs.false, %entry
+  %retval.0 = phi i8* [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default106 ], [ getelementptr inbounds ([44 x i8]* @.str858213, i32 0, i32 0), %sw.bb105 ], [ getelementptr inbounds ([29 x i8]* @.str848212, i32 0, i32 0), %sw.bb104 ], [ getelementptr inbounds ([31 x i8]* @.str838211, i32 0, i32 0), %sw.bb103 ], [ getelementptr inbounds ([34 x i8]* @.str828210, i32 0, i32 0), %sw.bb102 ], [ getelementptr inbounds ([20 x i8]* @.str818209, i32 0, i32 0), %sw.bb101 ], [ getelementptr inbounds ([24 x i8]* @.str808208, i32 0, i32 0), %sw.bb100 ], [ getelementptr inbounds ([25 x i8]* @.str798207, i32 0, i32 0), %sw.bb99 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default91 ], [ getelementptr inbounds ([8 x i8]* @.str768204, i32 0, i32 0), %sw.bb90 ], [ getelementptr inbounds ([21 x i8]* @.str758203, i32 0, i32 0), %sw.bb89 ], [ getelementptr inbounds ([26 x i8]* @.str748202, i32 0, i32 0), %sw.bb88 ], [ getelementptr inbounds ([21 x i8]* @.str738201, i32 0, i32 0), %sw.bb87 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default83 ], [ getelementptr inbounds ([9 x i8]* @.str718199, i32 0, i32 0), %sw.bb82 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default74 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default61 ], [ getelementptr inbounds ([5 x i8]* @.str514367, i32 0, i32 0), %sw.bb60 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default54 ], [ getelementptr inbounds ([4 x i8]* @.str2417235, i32 0, i32 0), %sw.bb53 ], [ getelementptr inbounds ([14 x i8]* @.str2317234, i32 0, i32 0), %sw.bb52 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %lor.lhs.false ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %entry ], [ %., %sw.bb39 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([3 x i8]* @.str20216493, i32 0, i32 0), %sw.bb42 ], [ %.35, %sw.bb56 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb37 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb35 ], [ getelementptr inbounds ([4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([6 x i8]* @.str398167, i32 0, i32 0), %sw.bb79 ], [ getelementptr inbounds ([5 x i8]* @.str2717238, i32 0, i32 0), %sw.bb84 ], [ %.36, %sw.bb92 ], [ getelementptr inbounds ([22 x i8]* @.str788206, i32 0, i32 0), %sw.bb96 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb77 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb75 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb66 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb64 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb ], [ %.37, %sw.bb120 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb114 ], [ %.38, %sw.bb135 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb127 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb112 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %if.end ]
+  ret i8* %retval.0
+}
+
+; Function Attrs: nounwind readonly
+define hidden i8* @_ZN12MediaInfoLib27Mxf_Sequence_DataDefinitionEN6ZenLib7uint128E(%"class.ZenLib::uint128"* nocapture readonly %DataDefinition) #0 {
+entry:
+  %lo = getelementptr inbounds %"class.ZenLib::uint128"* %DataDefinition, i32 0, i32 0
+  %0 = load i64* %lo, align 1, !tbaa !7
+  %and = lshr i64 %0, 32
+  %conv = trunc i64 %and to i32
+  %and2 = lshr i64 %0, 24
+  %conv5 = and i32 %conv, 255
+  switch i32 %conv5, label %return [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb9
+  ]
+
+sw.bb:                                            ; preds = %entry
+  %conv4 = trunc i64 %and2 to i32
+  %conv6 = and i32 %conv4, 255
+  switch i32 %conv6, label %sw.default [
+    i32 1, label %return
+    i32 2, label %return
+    i32 3, label %return
+    i32 16, label %sw.bb8
+  ]
+
+sw.bb8:                                           ; preds = %sw.bb
+  br label %return
+
+sw.default:                                       ; preds = %sw.bb
+  br label %return
+
+sw.bb9:                                           ; preds = %entry
+  %1 = trunc i64 %and2 to i32
+  %conv10 = and i32 %1, 255
+  switch i32 %conv10, label %sw.default14 [
+    i32 1, label %return
+    i32 2, label %sw.bb12
+    i32 3, label %sw.bb13
+  ]
+
+sw.bb12:                                          ; preds = %sw.bb9
+  br label %return
+
+sw.bb13:                                          ; preds = %sw.bb9
+  br label %return
+
+sw.default14:                                     ; preds = %sw.bb9
+  br label %return
+
+return:                                           ; preds = %sw.default14, %sw.bb13, %sw.bb12, %sw.bb9, %sw.default, %sw.bb8, %sw.bb, %sw.bb, %sw.bb, %entry
+  %retval.0 = phi i8* [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default14 ], [ getelementptr inbounds ([5 x i8]* @.str1717689, i32 0, i32 0), %sw.bb13 ], [ getelementptr inbounds ([6 x i8]* @.str928220, i32 0, i32 0), %sw.bb12 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default ], [ getelementptr inbounds ([21 x i8]* @.str908218, i32 0, i32 0), %sw.bb8 ], [ getelementptr inbounds ([5 x i8]* @.str9210864, i32 0, i32 0), %sw.bb ], [ getelementptr inbounds ([5 x i8]* @.str9210864, i32 0, i32 0), %sw.bb ], [ getelementptr inbounds ([5 x i8]* @.str9210864, i32 0, i32 0), %sw.bb ], [ getelementptr inbounds ([8 x i8]* @.str918219, i32 0, i32 0), %sw.bb9 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %entry ]
+  ret i8* %retval.0
+}
+
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0))
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
+
+attributes #0 = { nounwind readonly }
+
+!llvm.ident = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0}
+!llvm.module.flags = !{!1}
+
+!0 = metadata !{metadata !"clang version 3.4 (git@github.com:kripken/emscripten-fastcomp-clang.git 406c991ba0416c838ee097361c27a12411a088b9) (https://chromium.googlesource.com/native_client/pnacl-llvm a5e8942da586a7ef0ed02361b77a3010f16428cf)"}
+!1 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!2 = metadata !{metadata !3, metadata !4, i64 8}
+!3 = metadata !{metadata !"_ZTSN6ZenLib7uint128E", metadata !4, i64 0, metadata !4, i64 8}
+!4 = metadata !{metadata !"long long", metadata !5, i64 0}
+!5 = metadata !{metadata !"omnipotent char", metadata !6, i64 0}
+!6 = metadata !{metadata !"Simple C/C++ TBAA"}
+!7 = metadata !{metadata !3, metadata !4, i64 0}
+

From 1f35dd5faa5cc301cda3382722f68a3013158276 Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Thu, 18 Dec 2014 18:16:01 -0800
Subject: [PATCH 12/15] Fix the catch-all return code for SIMD values.

SIMD values can't simply be coerced from "0", so build a string for an
undef of the appropriate type.
---
 lib/Target/JSBackend/JSBackend.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index 206e956382d..33bec0542e4 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -2365,7 +2365,7 @@ void JSWriter::printFunctionBody(const Function *F) {
     if (!LastCurly) LastCurly = buffer;
     char *FinalReturn = strstr(LastCurly, "return ");
     if (!FinalReturn) {
-      Out << " return " << getCast("0", RT, ASM_NONSPECIFIC) << ";\n";
+      Out << " return " << getCast(getConstant(UndefValue::get(RT)), RT, ASM_NONSPECIFIC) << ";\n";
     }
   }
 }

From 700020cda465f92e758c2a198eb02e06192e4c38 Mon Sep 17 00:00:00 2001
From: Alon Zakai <alonzakai@gmail.com>
Date: Sat, 20 Dec 2014 12:34:16 -0800
Subject: [PATCH 13/15] add missing parents in extra return statements; fixes
 breakage from 1f35dd5faa5cc301cda3382722f68a3013158276

---
 lib/Target/JSBackend/JSBackend.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index 33bec0542e4..8a71ff54143 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -2365,7 +2365,7 @@ void JSWriter::printFunctionBody(const Function *F) {
     if (!LastCurly) LastCurly = buffer;
     char *FinalReturn = strstr(LastCurly, "return ");
     if (!FinalReturn) {
-      Out << " return " << getCast(getConstant(UndefValue::get(RT)), RT, ASM_NONSPECIFIC) << ";\n";
+      Out << " return " << getParenCast(getConstant(UndefValue::get(RT)), RT, ASM_NONSPECIFIC) << ";\n";
     }
   }
 }

From ef0e0d2cefc025103714c321351ca2e36169fce1 Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Fri, 26 Dec 2014 13:26:31 -0800
Subject: [PATCH 14/15] Enable Instcombine combining of vector shuffles

Instcombine is currently very conservative about combining vector
shuffles because it is afraid of pessimizing code translated into
generic form from xmmintrin.h intrinsics. For the current SIMD.js API,
it is more desirable to push JS engine code generators to emit good code
for the shuffles that users are actually doing, and less desirable to
preserve broken-up shuffles in their original form.
---
 lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 1e724106991..179d52f0329 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1027,7 +1027,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
 
   // If the result mask is equal to one of the original shuffle masks,
   // or is a splat, do the replacement.
-  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
+  //
+  // XXX EMSCRIPTEN: Add '|| true' so that we always do the replacement.
+  // We're targetting SIMD.js, so there's less of an expectation that a
+  // particular shuffle mask will always map onto a particular instruction on
+  // a particular ISA because we aren't targetting a particular ISA (what the
+  // JS engine does is another story). We may wish to re-evaluate this choice
+  // as we move on to higher-element-count vectors, but especially for now this
+  // is quite desirable.
+  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask ||
+      true)
+  {
     SmallVector<Constant*, 16> Elts;
     Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
     for (unsigned i = 0, e = newMask.size(); i != e; ++i) {

From 0bae9ad47629268bdd2fb79f1190746c7b17142e Mon Sep 17 00:00:00 2001
From: Alon Zakai <alonzakai@gmail.com>
Date: Sun, 4 Jan 2015 10:51:51 -0800
Subject: [PATCH 15/15] 1.28.3

---
 emscripten-version.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/emscripten-version.txt b/emscripten-version.txt
index b7825b8d767..a5096ba657e 100644
--- a/emscripten-version.txt
+++ b/emscripten-version.txt
@@ -1,2 +1,2 @@
-1.28.2
+1.28.3