diff --git a/emscripten-version.txt b/emscripten-version.txt
index b7825b8d767..a5096ba657e 100644
--- a/emscripten-version.txt
+++ b/emscripten-version.txt
@@ -1,2 +1,2 @@
-1.28.2
+1.28.3
 
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 30325f66bf2..19f18df659c 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -298,6 +298,7 @@ void initializeRewritePNaClLibraryCallsPass(PassRegistry&);
 void initializeStripAttributesPass(PassRegistry&);
 void initializeStripMetadataPass(PassRegistry&);
 void initializeExpandI64Pass(PassRegistry&); // XXX EMSCRIPTEN
+void initializeExpandInsertExtractElementPass(PassRegistry&); // XXX EMSCRIPTEN
 void initializeLowerEmExceptionsPass(PassRegistry&); // XXX EMSCRIPTEN
 void initializeLowerEmSetjmpPass(PassRegistry&); // XXX EMSCRIPTEN
 void initializeLowerEmAsyncifyPass(PassRegistry&); // XXX EMSCRIPTEN
diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h
index d3b4e8bc64d..22b95cae18c 100644
--- a/include/llvm/Transforms/NaCl.h
+++ b/include/llvm/Transforms/NaCl.h
@@ -53,6 +53,7 @@ ModulePass *createStripAttributesPass();
 ModulePass *createStripMetadataPass();
 
 ModulePass *createExpandI64Pass(); // XXX EMSCRIPTEN
+ModulePass *createExpandInsertExtractElementPass(); // XXX EMSCRIPTEN
 ModulePass *createLowerEmExceptionsPass(); // XXX EMSCRIPTEN
 ModulePass *createLowerEmSetjmpPass(); // XXX EMSCRIPTEN
 ModulePass *createNoExitRuntimePass(); // XXX EMSCRIPTEN
diff --git a/lib/Target/JSBackend/CMakeLists.txt b/lib/Target/JSBackend/CMakeLists.txt
index ac04b0a2a71..c1194467f4b 100644
--- a/lib/Target/JSBackend/CMakeLists.txt
+++ b/lib/Target/JSBackend/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_target(JSBackendCodeGen
   AllocaManager.cpp
   ExpandI64.cpp
+  ExpandInsertExtractElement.cpp
   JSBackend.cpp
   JSTargetMachine.cpp
   JSTargetTransformInfo.cpp
diff --git a/lib/Target/JSBackend/CallHandlers.h b/lib/Target/JSBackend/CallHandlers.h
index 0f2cbabf20d..f3991dad1ea 100644
--- a/lib/Target/JSBackend/CallHandlers.h
+++ b/lib/Target/JSBackend/CallHandlers.h
@@ -454,8 +454,7 @@ DEF_CALL_HANDLER(llvm_cttz_i32, {
 
 // vector ops
 DEF_CALL_HANDLER(emscripten_float32x4_signmask, {
-  // TODO: use signMaskPolyfill explicitly for now, until the builtin signMask is ready.
-  return getAssign(CI) + getValueAsStr(CI->getOperand(0)) + ".signMaskPolyfill";
+  return getAssign(CI) + getValueAsStr(CI->getOperand(0)) + ".signMask";
 })
 
 #define DEF_BUILTIN_HANDLER(name, to) \
diff --git a/lib/Target/JSBackend/ExpandI64.cpp b/lib/Target/JSBackend/ExpandI64.cpp
index 168e66063b3..f40ac691dcc 100644
--- a/lib/Target/JSBackend/ExpandI64.cpp
+++ b/lib/Target/JSBackend/ExpandI64.cpp
@@ -815,6 +815,16 @@ bool ExpandI64::splitInst(Instruction *I) {
         Chunks.push_back(L);
         Chunks.push_back(H);
         break;
+      } else if (isa<VectorType>(I->getOperand(0)->getType()) && !isa<VectorType>(I->getType())) {
+          unsigned NumElts = getNumChunks(I->getType());
+          VectorType *IVTy = VectorType::get(i32, NumElts);
+          Instruction *B = CopyDebug(new BitCastInst(I->getOperand(0), IVTy, "", I), I);
+          for (unsigned i = 0; i < NumElts; ++i) {
+              Constant *Idx = ConstantInt::get(i32, i);
+              Instruction *Ext = CopyDebug(ExtractElementInst::Create(B, Idx, "", I), I);
+              Chunks.push_back(Ext);
+          }
+          break;
       } else {
         // no-op bitcast
         assert(I->getType() == I->getOperand(0)->getType());
@@ -1129,6 +1139,7 @@ bool ExpandI64::runOnModule(Module &M) {
         Phi->addIncoming(Phi->getIncomingValue(Index), Change.NewBB);
       }
     }
+    PhiBlockChanges.clear();
 
     // We only visited blocks found by a DFS walk from the entry, so we haven't
     // visited any unreachable blocks, and they may still contain illegal
diff --git a/lib/Target/JSBackend/ExpandInsertExtractElement.cpp b/lib/Target/JSBackend/ExpandInsertExtractElement.cpp
new file mode 100644
index 00000000000..fbc7b8667aa
--- /dev/null
+++ b/lib/Target/JSBackend/ExpandInsertExtractElement.cpp
@@ -0,0 +1,106 @@
+//==- ExpandInsertExtractElement.cpp - Expand vector insert and extract -=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===------------------------------------------------------------------===//
+//
+// This pass expands insertelement and extractelement instructions with
+// variable indices, which SIMD.js doesn't natively support yet.
+//
+//===------------------------------------------------------------------===//
+
+#include "OptPasses.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <map>
+#include <vector>
+
+#include "llvm/Support/raw_ostream.h"
+
+#ifdef NDEBUG
+#undef assert
+#define assert(x) { if (!(x)) report_fatal_error(#x); }
+#endif
+
+using namespace llvm;
+
+namespace {
+
+  class ExpandInsertExtractElement : public FunctionPass {
+    bool Changed;
+
+  public:
+    static char ID;
+    ExpandInsertExtractElement() : FunctionPass(ID) {
+      initializeExpandInsertExtractElementPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+  };
+}
+
+char ExpandInsertExtractElement::ID = 0;
+INITIALIZE_PASS(ExpandInsertExtractElement, "expand-insert-extract-elements",
+                "Expand and lower insert and extract element operations",
+                false, false)
+
+// Utilities
+
+static Instruction *CopyDebug(Instruction *NewInst, Instruction *Original) {
+  NewInst->setDebugLoc(Original->getDebugLoc());
+  return NewInst;
+}
+
+bool ExpandInsertExtractElement::runOnFunction(Function &F) {
+  Changed = false;
+
+  Instruction *Entry = F.getEntryBlock().begin();
+  Type *Int32 = Type::getInt32Ty(F.getContext());
+  Constant *Zero = ConstantInt::get(Int32, 0);
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+
+    if (InsertElementInst *III = dyn_cast<InsertElementInst>(Inst)) {
+      if (isa<ConstantInt>(III->getOperand(2)))
+          continue;
+
+      Instruction *A = new AllocaInst(III->getType(), 0, "", Entry);
+      CopyDebug(new StoreInst(III->getOperand(0), A, III), III);
+
+      Value *Idxs[] = { Zero, III->getOperand(2) };
+      Instruction *B = CopyDebug(GetElementPtrInst::Create(A, Idxs, "", III), III);
+      CopyDebug(new StoreInst(III->getOperand(1), B, III), III);
+
+      Instruction *L = CopyDebug(new LoadInst(A, "", III), III);
+      III->replaceAllUsesWith(L);
+      III->eraseFromParent();
+    } else if (ExtractElementInst *EII = dyn_cast<ExtractElementInst>(Inst)) {
+      if (isa<ConstantInt>(EII->getOperand(1)))
+          continue;
+
+      Instruction *A = new AllocaInst(EII->getOperand(0)->getType(), 0, "", Entry);
+      CopyDebug(new StoreInst(EII->getOperand(0), A, EII), EII);
+
+      Value *Idxs[] = { Zero, EII->getOperand(1) };
+      Instruction *B = CopyDebug(GetElementPtrInst::Create(A, Idxs, "", EII), EII);
+      Instruction *L = CopyDebug(new LoadInst(B, "", EII), EII);
+      EII->replaceAllUsesWith(L);
+      EII->eraseFromParent();
+    }
+  }
+
+  return Changed;
+}
+
+Pass *llvm::createExpandInsertExtractElementPass() {
+  return new ExpandInsertExtractElement();
+}
diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index 4b8da31df04..8a71ff54143 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -386,7 +386,8 @@ namespace {
       // of the compare that produced them.
       assert(VT->getElementType()->getPrimitiveSizeInBits() == 32 ||
              VT->getElementType()->getPrimitiveSizeInBits() == 1);
-      assert(VT->getNumElements() == 4);
+      assert(VT->getBitWidth() <= 128);
+      assert(VT->getNumElements() <= 4);
       UsesSIMD = true;
     }
 
@@ -455,6 +456,7 @@ namespace {
     void generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw_string_ostream& Code);
     void generateICmpExpression(const ICmpInst *I, raw_string_ostream& Code);
     void generateFCmpExpression(const FCmpInst *I, raw_string_ostream& Code);
+    void generateShiftExpression(const BinaryOperator *I, raw_string_ostream& Code);
     void generateUnrolledExpression(const User *I, raw_string_ostream& Code);
     bool generateSIMDExpression(const User *I, raw_string_ostream& Code);
     void generateExpression(const User *I, raw_string_ostream& Code);
@@ -1073,18 +1075,23 @@ std::string JSWriter::getConstant(const Constant* CV, AsmCast sign) {
       return "0";
     }
   } else if (const ConstantDataVector *DV = dyn_cast<ConstantDataVector>(CV)) {
-    return getConstantVector(cast<VectorType>(CV->getType())->getElementType(),
-                             getConstant(DV->getElementAsConstant(0)),
-                             getConstant(DV->getElementAsConstant(1)),
-                             getConstant(DV->getElementAsConstant(2)),
-                             getConstant(DV->getElementAsConstant(3)));
+    unsigned NumElts = cast<VectorType>(DV->getType())->getNumElements();
+    Type *EltTy = cast<VectorType>(DV->getType())->getElementType();
+    Constant *Undef = UndefValue::get(EltTy);
+    return getConstantVector(EltTy,
+                             getConstant(NumElts > 0 ? DV->getElementAsConstant(0) : Undef),
+                             getConstant(NumElts > 1 ? DV->getElementAsConstant(1) : Undef),
+                             getConstant(NumElts > 2 ? DV->getElementAsConstant(2) : Undef),
+                             getConstant(NumElts > 3 ? DV->getElementAsConstant(3) : Undef));
   } else if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) {
-    assert(V->getNumOperands() == 4);
+    unsigned NumElts = cast<VectorType>(CV->getType())->getNumElements();
+    Type *EltTy = cast<VectorType>(CV->getType())->getElementType();
+    Constant *Undef = UndefValue::get(EltTy);
     return getConstantVector(cast<VectorType>(V->getType())->getElementType(),
-                             getConstant(V->getOperand(0)),
-                             getConstant(V->getOperand(1)),
-                             getConstant(V->getOperand(2)),
-                             getConstant(V->getOperand(3)));
+                             getConstant(NumElts > 0 ? V->getOperand(0) : Undef),
+                             getConstant(NumElts > 1 ? V->getOperand(1) : Undef),
+                             getConstant(NumElts > 2 ? V->getOperand(2) : Undef),
+                             getConstant(NumElts > 3 ? V->getOperand(3) : Undef));
   } else if (const ConstantArray *CA = dyn_cast<const ConstantArray>(CV)) {
     // handle things like [i8* bitcast (<{ i32, i32, i32 }>* @_ZTISt9bad_alloc to i8*)] which clang can emit for landingpads
     assert(CA->getNumOperands() == 1);
@@ -1299,20 +1306,22 @@ void JSWriter::generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw
   // Check whether can generate SIMD.js swizzle or shuffle.
   std::string A = getValueAsStr(SVI->getOperand(0));
   std::string B = getValueAsStr(SVI->getOperand(1));
-  int Mask0 = SVI->getMaskValue(0);
-  int Mask1 = SVI->getMaskValue(1);
-  int Mask2 = SVI->getMaskValue(2);
-  int Mask3 = SVI->getMaskValue(3);
+  int OpNumElements = cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+  int ResultNumElements = SVI->getType()->getNumElements();
+  int Mask0 = ResultNumElements > 0 ? SVI->getMaskValue(0) : -1;
+  int Mask1 = ResultNumElements > 1 ? SVI->getMaskValue(1) : -1;
+  int Mask2 = ResultNumElements > 2 ? SVI->getMaskValue(2) : -1;
+  int Mask3 = ResultNumElements > 3 ? SVI->getMaskValue(3) : -1;
   bool swizzleA = false;
   bool swizzleB = false;
-  if ((Mask0 < 4) && (Mask1 < 4) &&
-      (Mask2 < 4) && (Mask3 < 4)) {
+  if ((Mask0 < OpNumElements) && (Mask1 < OpNumElements) &&
+      (Mask2 < OpNumElements) && (Mask3 < OpNumElements)) {
     swizzleA = true;
   }
-  if ((Mask0 < 0 || (Mask0 >= 4 && Mask0 < 8)) &&
-      (Mask1 < 0 || (Mask1 >= 4 && Mask1 < 8)) &&
-      (Mask2 < 0 || (Mask2 >= 4 && Mask2 < 8)) &&
-      (Mask3 < 0 || (Mask3 >= 4 && Mask3 < 8))) {
+  if ((Mask0 < 0 || (Mask0 >= OpNumElements && Mask0 < OpNumElements * 2)) &&
+      (Mask1 < 0 || (Mask1 >= OpNumElements && Mask1 < OpNumElements * 2)) &&
+      (Mask2 < 0 || (Mask2 >= OpNumElements && Mask2 < OpNumElements * 2)) &&
+      (Mask3 < 0 || (Mask3 >= OpNumElements && Mask3 < OpNumElements * 2))) {
     swizzleB = true;
   }
   assert(!(swizzleA && swizzleB));
@@ -1323,18 +1332,22 @@ void JSWriter::generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw
     } else {
       Code << "SIMD_float32x4_swizzle(" << T;
     }
-    for (unsigned int i = 0; i < 4; i++) {
+    int i = 0;
+    for (; i < ResultNumElements; ++i) {
       Code << ", ";
       int Mask = SVI->getMaskValue(i);
       if (Mask < 0) {
         Code << 0;
-      } else if (Mask < 4) {
+      } else if (Mask < OpNumElements) {
         Code << Mask;
       } else {
-        assert(Mask < 8);
-        Code << (Mask-4);
+        assert(Mask < OpNumElements * 2);
+        Code << (Mask-OpNumElements);
       }
     }
+    for (; i < 4; ++i) {
+      Code << ", 0";
+    }
     Code << ")";
     return;
   }
@@ -1353,7 +1366,13 @@ void JSWriter::generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw
   for (unsigned int i = 0; i < Indices.size(); ++i) {
     if (i != 0)
       Code << ", ";
-    Code << Indices[i];
+    int Mask = Indices[i];
+    if (Mask >= OpNumElements)
+      Mask = Mask - OpNumElements + 4;
+    if (Mask < 0)
+      Code << 0;
+    else
+      Code << Mask;
   }
 
   Code << ")";
@@ -1449,9 +1468,61 @@ void JSWriter::generateFCmpExpression(const FCmpInst *I, raw_string_ostream& Cod
     Code << ")";
 }
 
+static const Value *getElement(const Value *V, unsigned i) {
+    if (const InsertElementInst *II = dyn_cast<InsertElementInst>(V)) {
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(2))) {
+            if (CI->equalsInt(i))
+                return II->getOperand(1);
+        }
+        return getElement(II->getOperand(0), i);
+    }
+    return NULL;
+}
+
+static const Value *getSplatValue(const Value *V) {
+    if (const Constant *C = dyn_cast<Constant>(V))
+        return C->getSplatValue();
+
+    VectorType *VTy = cast<VectorType>(V->getType());
+    const Value *Result = NULL;
+    for (unsigned i = 0; i < VTy->getNumElements(); ++i) {
+        const Value *E = getElement(V, i);
+        if (!E)
+            return NULL;
+        if (!Result)
+            Result = E;
+        else if (Result != E)
+            return NULL;
+    }
+    return Result;
+
+}
+
+void JSWriter::generateShiftExpression(const BinaryOperator *I, raw_string_ostream& Code) {
+    // If we're shifting every lane by the same amount (shifting by a splat value
+    // then we can use a ByScalar shift.
+    const Value *Count = I->getOperand(1);
+    if (const Value *Splat = getSplatValue(Count)) {
+        Code << getAssignIfNeeded(I) << "SIMD_int32x4_";
+        if (I->getOpcode() == Instruction::AShr)
+            Code << "shiftRightArithmeticByScalar";
+        else if (I->getOpcode() == Instruction::LShr)
+            Code << "shiftRightLogicalByScalar";
+        else
+            Code << "shiftLeftByScalar";
+        Code << "(" << getValueAsStr(I->getOperand(0)) << ", " << getValueAsStr(Splat) << ")";
+        return;
+    }
+
+    // SIMD.js does not currently have vector-vector shifts.
+    generateUnrolledExpression(I, Code);
+}
+
 void JSWriter::generateUnrolledExpression(const User *I, raw_string_ostream& Code) {
   VectorType *VT = cast<VectorType>(I->getType());
 
+  Code << getAssignIfNeeded(I);
+
   if (VT->getElementType()->isIntegerTy()) {
     Code << "SIMD_int32x4(";
   } else {
@@ -1481,6 +1552,18 @@ void JSWriter::generateUnrolledExpression(const User *I, raw_string_ostream& Cod
         Code << "(" << getValueAsStr(I->getOperand(0)) << Lane << ">>>0) / ("
              << getValueAsStr(I->getOperand(1)) << Lane << ">>>0)>>>0";
         break;
+      case Instruction::AShr:
+        Code << "(" << getValueAsStr(I->getOperand(0)) << Lane << "|0) >> ("
+             << getValueAsStr(I->getOperand(1)) << Lane << "|0)|0";
+        break;
+      case Instruction::LShr:
+        Code << "(" << getValueAsStr(I->getOperand(0)) << Lane << "|0) >>> ("
+             << getValueAsStr(I->getOperand(1)) << Lane << "|0)|0";
+        break;
+      case Instruction::Shl:
+        Code << "(" << getValueAsStr(I->getOperand(0)) << Lane << "|0) << ("
+             << getValueAsStr(I->getOperand(1)) << Lane << "|0)|0";
+        break;
       default: I->dump(); error("invalid unrolled vector instr"); break;
     }
   }
@@ -1514,8 +1597,17 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
         Code << getAssignIfNeeded(I) << getValueAsStr(I->getOperand(0));
         break;
       case Instruction::Select:
-        assert(I->getOperand(0)->getType()->isIntegerTy(1) && "vector-of-i1 select not yet supported");
-        // select arms are SIMD values, no special handling
+        // Since we represent vectors of i1 as vectors of sign extended wider integers,
+        // selecting on them is just an elementwise select.
+        if (isa<VectorType>(I->getOperand(0)->getType())) {
+          if (cast<VectorType>(I->getType())->getElementType()->isIntegerTy()) {
+            Code << getAssignIfNeeded(I) << "SIMD_int32x4_select(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << "," << getValueAsStr(I->getOperand(2)) << ")"; break;
+          } else {
+            Code << getAssignIfNeeded(I) << "SIMD_float32x4_select(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << "," << getValueAsStr(I->getOperand(2)) << ")"; break;
+          }
+          return true;
+        }
+        // Otherwise we have a scalar condition, so it's a ?: operator.
         return false;
       case Instruction::FAdd: Code << getAssignIfNeeded(I) << "SIMD_float32x4_add(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break;
       case Instruction::FMul: Code << getAssignIfNeeded(I) << "SIMD_float32x4_mul(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break;
@@ -1556,11 +1648,20 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
         const LoadInst *LI = cast<LoadInst>(I);
         const Value *P = LI->getPointerOperand();
         std::string PS = getValueAsStr(P);
+
+        // Determine if this is a partial load.
+        static const std::string partialAccess[4] = { "X", "XY", "XYZ", "" };
+        if (VT->getNumElements() < 1 || VT->getNumElements() > 4) {
+          error("invalid number of lanes in SIMD operation!");
+          break;
+        }
+        const std::string &Part = partialAccess[VT->getNumElements() - 1];
+
         Code << getAssignIfNeeded(I);
         if (VT->getElementType()->isIntegerTy()) {
-          Code << "SIMD_int32x4_load(HEAPU8, " << PS << ")";
+          Code << "SIMD_int32x4_load" << Part << "(HEAPU8, " << PS << ")";
         } else {
-          Code << "SIMD_float32x4_load(HEAPU8, " << PS << ")";
+          Code << "SIMD_float32x4_load" << Part << "(HEAPU8, " << PS << ")";
         }
         break;
       }
@@ -1580,6 +1681,11 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
         // doesn't support them).
         generateUnrolledExpression(I, Code);
         break;
+      case Instruction::AShr:
+      case Instruction::LShr:
+      case Instruction::Shl:
+        generateShiftExpression(cast<BinaryOperator>(I), Code);
+        break;
     }
     return true;
   } else {
@@ -1591,10 +1697,19 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
       std::string PS = getOpName(P);
       std::string VS = getValueAsStr(SI->getValueOperand());
       Code << getAdHocAssign(PS, P->getType()) << getValueAsStr(P) << ';';
+
+      // Determine if this is a partial store.
+      static const std::string partialAccess[4] = { "X", "XY", "XYZ", "" };
+      if (VT->getNumElements() < 1 || VT->getNumElements() > 4) {
+        error("invalid number of lanes in SIMD operation!");
+        return false;
+      }
+      const std::string &Part = partialAccess[VT->getNumElements() - 1];
+
       if (VT->getElementType()->isIntegerTy()) {
-        Code << "SIMD_int32x4_store(HEAPU8, " << PS << ", " << VS << ")";
+        Code << "SIMD_int32x4_store" << Part << "(HEAPU8, " << PS << ", " << VS << ")";
       } else {
-        Code << "SIMD_float32x4_store(HEAPU8, " << PS << ", " << VS << ")";
+        Code << "SIMD_float32x4_store" << Part << "(HEAPU8, " << PS << ", " << VS << ")";
       }
       return true;
     } else if (Operator::getOpcode(I) == Instruction::ExtractElement) {
@@ -2250,7 +2365,7 @@ void JSWriter::printFunctionBody(const Function *F) {
     if (!LastCurly) LastCurly = buffer;
     char *FinalReturn = strstr(LastCurly, "return ");
     if (!FinalReturn) {
-      Out << " return " << getCast("0", RT, ASM_NONSPECIFIC) << ";\n";
+      Out << " return " << getParenCast(getConstant(UndefValue::get(RT)), RT, ASM_NONSPECIFIC) << ";\n";
     }
   }
 }
@@ -2789,6 +2904,7 @@ bool JSTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                           AnalysisID StopAfter) {
   assert(FileType == TargetMachine::CGFT_AssemblyFile);
 
+  PM.add(createExpandInsertExtractElementPass());
   PM.add(createExpandI64Pass());
 
   CodeGenOpt::Level OptLevel = getOptLevel();
diff --git a/lib/Target/JSBackend/JSTargetTransformInfo.cpp b/lib/Target/JSBackend/JSTargetTransformInfo.cpp
index 74c2201d54e..dcb92b4d643 100644
--- a/lib/Target/JSBackend/JSTargetTransformInfo.cpp
+++ b/lib/Target/JSBackend/JSTargetTransformInfo.cpp
@@ -68,6 +68,15 @@ class JSTTI : public ImmutablePass, public TargetTransformInfo {
   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
 
   virtual unsigned getRegisterBitWidth(bool Vector) const;
+
+  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                          OperandValueKind Opd1Info = OK_AnyValue,
+                                          OperandValueKind Opd2Info = OK_AnyValue) const;
+
+  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+                                      unsigned Index = -1) const;
+
+  virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
 };
 
 } // end anonymous namespace
@@ -101,3 +110,56 @@ unsigned JSTTI::getRegisterBitWidth(bool Vector) const {
 
   return 32;
 }
+
+unsigned JSTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                       OperandValueKind Opd1Info,
+                                       OperandValueKind Opd2Info) const {
+  const unsigned Nope = 65536;
+
+  unsigned Cost = TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info);
+
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+    switch (VTy->getNumElements()) {
+    case 4:
+      // SIMD.js supports int32x4 and float32x4, and we can emulate <4 x i1>.
+      if (!VTy->getElementType()->isIntegerTy(1) &&
+          !VTy->getElementType()->isIntegerTy(32) &&
+          !VTy->getElementType()->isFloatTy())
+      {
+          return Nope;
+      }
+      break;
+    default:
+      // Wait until the other types are optimized.
+      return Nope;
+    }
+
+    switch (Opcode) {
+      case Instruction::LShr:
+      case Instruction::AShr:
+      case Instruction::Shl:
+        // SIMD.js' shifts are currently only ByScalar.
+        if (Opd2Info != OK_UniformValue && Opd2Info != OK_UniformConstantValue)
+          Cost = Cost * VTy->getNumElements() + 100;
+        break;
+    }
+  }
+
+  return Cost;
+}
+
+unsigned JSTTI::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const {
+  unsigned Cost = TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+
+  // SIMD.js' insert/extract currently only take constant indices.
+  if (Index == -1u)
+      return Cost + 100;
+
+  return Cost;
+}
+
+void JSTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
+  // We generally don't want a lot of unrolling.
+  UP.Partial = false;
+  UP.Runtime = false;
+}
diff --git a/lib/Target/JSBackend/OptPasses.h b/lib/Target/JSBackend/OptPasses.h
index 2f90b568b01..5e236569f9e 100644
--- a/lib/Target/JSBackend/OptPasses.h
+++ b/lib/Target/JSBackend/OptPasses.h
@@ -17,6 +17,7 @@ namespace llvm {
   extern FunctionPass *createSimplifyAllocasPass();
 
   extern Pass *createExpandI64Pass();
+  extern Pass *createExpandInsertExtractElementPass();
 
 } // End llvm namespace
 
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 1e724106991..179d52f0329 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1027,7 +1027,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
 
   // If the result mask is equal to one of the original shuffle masks,
   // or is a splat, do the replacement.
-  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
+  //
+  // XXX EMSCRIPTEN: Add '|| true' so that we always do the replacement.
+  // We're targetting SIMD.js, so there's less of an expectation that a
+  // particular shuffle mask will always map onto a particular instruction on
+  // a particular ISA because we aren't targetting a particular ISA (what the
+  // JS engine does is another story). We may wish to re-evaluate this choice
+  // as we move on to higher-element-count vectors, but especially for now this
+  // is quite desirable.
+  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask ||
+      true)
+  {
     SmallVector<Constant*, 16> Elts;
     Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
     for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
diff --git a/test/CodeGen/JS/blockchanges.ll b/test/CodeGen/JS/blockchanges.ll
new file mode 100644
index 00000000000..b93e6688c40
--- /dev/null
+++ b/test/CodeGen/JS/blockchanges.ll
@@ -0,0 +1,411 @@
+; RUN: llc < %s
+
+; regression check for emscripten #3088 - we were not clearing BlockChanges in i64 lowering
+
+; ModuleID = 'waka.bc'
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+%"class.ZenLib::uint128" = type <{ i64, i64 }>
+
+@.str = private unnamed_addr constant [15 x i8] c"hello, world!\0A\00", align 1
+
+@.str368164 = external hidden unnamed_addr constant [10 x i8], align 1
+@.str398167 = external hidden unnamed_addr constant [6 x i8], align 1
+@.str718199 = external hidden unnamed_addr constant [9 x i8], align 1
+@.str738201 = external hidden unnamed_addr constant [21 x i8], align 1
+@.str748202 = external hidden unnamed_addr constant [26 x i8], align 1
+@.str758203 = external hidden unnamed_addr constant [21 x i8], align 1
+@.str768204 = external hidden unnamed_addr constant [8 x i8], align 1
+@.str778205 = external hidden unnamed_addr constant [14 x i8], align 1
+@.str788206 = external hidden unnamed_addr constant [22 x i8], align 1
+@.str798207 = external hidden unnamed_addr constant [25 x i8], align 1
+@.str808208 = external hidden unnamed_addr constant [24 x i8], align 1
+@.str818209 = external hidden unnamed_addr constant [20 x i8], align 1
+@.str828210 = external hidden unnamed_addr constant [34 x i8], align 1
+@.str838211 = external hidden unnamed_addr constant [31 x i8], align 1
+@.str848212 = external hidden unnamed_addr constant [29 x i8], align 1
+@.str858213 = external hidden unnamed_addr constant [44 x i8], align 1
+@.str868214 = external hidden unnamed_addr constant [12 x i8], align 1
+@.str908218 = external hidden unnamed_addr constant [21 x i8], align 1
+@.str918219 = external hidden unnamed_addr constant [8 x i8], align 1
+@.str928220 = external hidden unnamed_addr constant [6 x i8], align 1
+@.str9210864 = external hidden unnamed_addr constant [5 x i8], align 1
+@.str514367 = external hidden unnamed_addr constant [5 x i8], align 1
+@.str214409 = external hidden unnamed_addr constant [4 x i8], align 1
+@.str20216493 = external hidden unnamed_addr constant [3 x i8], align 1
+@.str2017231 = external hidden unnamed_addr constant [11 x i8], align 1
+@.str2317234 = external hidden unnamed_addr constant [14 x i8], align 1
+@.str2417235 = external hidden unnamed_addr constant [4 x i8], align 1
+@.str2717238 = external hidden unnamed_addr constant [5 x i8], align 1
+@.str3217243 = external hidden unnamed_addr constant [4 x i8], align 1
+@.str1717689 = external hidden unnamed_addr constant [5 x i8], align 1
+@.str2104 = external hidden unnamed_addr constant [1 x i8], align 1
+
+; Function Attrs: nounwind readonly
+define hidden i8* @_ZN12MediaInfoLib22Mxf_EssenceCompressionEN6ZenLib7uint128E(%"class.ZenLib::uint128"* nocapture readonly %EssenceCompression) #0 {
+entry:
+  %hi = getelementptr inbounds %"class.ZenLib::uint128"* %EssenceCompression, i32 0, i32 1
+  %0 = load i64* %hi, align 1, !tbaa !2
+  %and = and i64 %0, -256
+  %cmp = icmp eq i64 %and, 436333716306985216
+  br i1 %cmp, label %lor.lhs.false, label %return
+
+lor.lhs.false:                                    ; preds = %entry
+  %lo = getelementptr inbounds %"class.ZenLib::uint128"* %EssenceCompression, i32 0, i32 0
+  %1 = load i64* %lo, align 1, !tbaa !7
+  %and1 = and i64 %1, -72057594037927936
+  switch i64 %and1, label %return [
+    i64 288230376151711744, label %if.end
+    i64 1008806316530991104, label %if.end
+  ]
+
+if.end:                                           ; preds = %lor.lhs.false, %lor.lhs.false
+  %shr = lshr i64 %1, 56
+  %conv = trunc i64 %shr to i32
+  %and10 = lshr i64 %1, 48
+  %and14 = lshr i64 %1, 40
+  %and18 = lshr i64 %1, 32
+  %conv20 = trunc i64 %and18 to i32
+  %and22 = lshr i64 %1, 24
+  %and26 = lshr i64 %1, 16
+  %conv28 = trunc i64 %and26 to i32
+  %and30 = lshr i64 %1, 8
+  %conv32 = trunc i64 %and30 to i32
+  switch i32 %conv, label %return [
+    i32 4, label %sw.bb
+    i32 14, label %sw.bb112
+  ]
+
+sw.bb:                                            ; preds = %if.end
+  %conv12 = trunc i64 %and10 to i32
+  %conv34 = and i32 %conv12, 255
+  switch i32 %conv34, label %return [
+    i32 1, label %sw.bb35
+    i32 2, label %sw.bb64
+  ]
+
+sw.bb35:                                          ; preds = %sw.bb
+  %conv36 = and i64 %and14, 255
+  %cond12 = icmp eq i64 %conv36, 2
+  br i1 %cond12, label %sw.bb37, label %return
+
+sw.bb37:                                          ; preds = %sw.bb35
+  %conv38 = and i32 %conv20, 255
+  switch i32 %conv38, label %return [
+    i32 1, label %sw.bb39
+    i32 2, label %sw.bb42
+  ]
+
+sw.bb39:                                          ; preds = %sw.bb37
+  %conv40 = and i64 %and22, 255
+  %cond14 = icmp eq i64 %conv40, 1
+  %. = select i1 %cond14, i8* getelementptr inbounds ([4 x i8]* @.str214409, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0)
+  br label %return
+
+sw.bb42:                                          ; preds = %sw.bb37
+  %2 = trunc i64 %and22 to i32
+  %conv43 = and i32 %2, 255
+  switch i32 %conv43, label %sw.default61 [
+    i32 1, label %sw.bb44
+    i32 2, label %return
+    i32 3, label %sw.bb56
+    i32 113, label %sw.bb60
+  ]
+
+sw.bb44:                                          ; preds = %sw.bb42
+  %conv45 = and i32 %conv28, 255
+  switch i32 %conv45, label %sw.default54 [
+    i32 0, label %return
+    i32 1, label %return
+    i32 2, label %return
+    i32 3, label %return
+    i32 4, label %return
+    i32 17, label %return
+    i32 32, label %sw.bb52
+    i32 48, label %sw.bb53
+    i32 49, label %sw.bb53
+    i32 50, label %sw.bb53
+    i32 51, label %sw.bb53
+    i32 52, label %sw.bb53
+    i32 53, label %sw.bb53
+    i32 54, label %sw.bb53
+    i32 55, label %sw.bb53
+    i32 56, label %sw.bb53
+    i32 57, label %sw.bb53
+    i32 58, label %sw.bb53
+    i32 59, label %sw.bb53
+    i32 60, label %sw.bb53
+    i32 61, label %sw.bb53
+    i32 62, label %sw.bb53
+    i32 63, label %sw.bb53
+  ]
+
+sw.bb52:                                          ; preds = %sw.bb44
+  br label %return
+
+sw.bb53:                                          ; preds = %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44
+  br label %return
+
+sw.default54:                                     ; preds = %sw.bb44
+  br label %return
+
+sw.bb56:                                          ; preds = %sw.bb42
+  %conv57 = and i64 %and26, 255
+  %cond13 = icmp eq i64 %conv57, 1
+  %.35 = select i1 %cond13, i8* getelementptr inbounds ([10 x i8]* @.str368164, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0)
+  br label %return
+
+sw.bb60:                                          ; preds = %sw.bb42
+  br label %return
+
+sw.default61:                                     ; preds = %sw.bb42
+  br label %return
+
+sw.bb64:                                          ; preds = %sw.bb
+  %conv65 = and i64 %and14, 255
+  %cond9 = icmp eq i64 %conv65, 2
+  br i1 %cond9, label %sw.bb66, label %return
+
+sw.bb66:                                          ; preds = %sw.bb64
+  %conv67 = and i32 %conv20, 255
+  switch i32 %conv67, label %return [
+    i32 1, label %sw.bb68
+    i32 2, label %sw.bb75
+  ]
+
+sw.bb68:                                          ; preds = %sw.bb66
+  %3 = trunc i64 %and22 to i32
+  %conv69 = and i32 %3, 255
+  switch i32 %conv69, label %sw.default74 [
+    i32 0, label %return
+    i32 1, label %return
+    i32 126, label %return
+    i32 127, label %return
+  ]
+
+sw.default74:                                     ; preds = %sw.bb68
+  br label %return
+
+sw.bb75:                                          ; preds = %sw.bb66
+  %conv76 = and i64 %and22, 255
+  %cond10 = icmp eq i64 %conv76, 3
+  br i1 %cond10, label %sw.bb77, label %return
+
+sw.bb77:                                          ; preds = %sw.bb75
+  %conv78 = and i32 %conv28, 255
+  switch i32 %conv78, label %return [
+    i32 1, label %sw.bb79
+    i32 2, label %sw.bb84
+    i32 3, label %sw.bb92
+    i32 4, label %sw.bb96
+  ]
+
+sw.bb79:                                          ; preds = %sw.bb77
+  %conv80 = and i32 %conv32, 255
+  switch i32 %conv80, label %sw.default83 [
+    i32 1, label %return
+    i32 16, label %sw.bb82
+  ]
+
+sw.bb82:                                          ; preds = %sw.bb79
+  br label %return
+
+sw.default83:                                     ; preds = %sw.bb79
+  br label %return
+
+sw.bb84:                                          ; preds = %sw.bb77
+  %conv85 = and i32 %conv32, 255
+  switch i32 %conv85, label %sw.default91 [
+    i32 1, label %return
+    i32 4, label %sw.bb87
+    i32 5, label %sw.bb88
+    i32 6, label %sw.bb89
+    i32 28, label %sw.bb90
+  ]
+
+sw.bb87:                                          ; preds = %sw.bb84
+  br label %return
+
+sw.bb88:                                          ; preds = %sw.bb84
+  br label %return
+
+sw.bb89:                                          ; preds = %sw.bb84
+  br label %return
+
+sw.bb90:                                          ; preds = %sw.bb84
+  br label %return
+
+sw.default91:                                     ; preds = %sw.bb84
+  br label %return
+
+sw.bb92:                                          ; preds = %sw.bb77
+  %conv93 = and i64 %and30, 255
+  %cond11 = icmp eq i64 %conv93, 1
+  %.36 = select i1 %cond11, i8* getelementptr inbounds ([14 x i8]* @.str778205, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0)
+  br label %return
+
+sw.bb96:                                          ; preds = %sw.bb77
+  %conv97 = and i32 %conv32, 255
+  switch i32 %conv97, label %sw.default106 [
+    i32 1, label %return
+    i32 2, label %sw.bb99
+    i32 3, label %sw.bb100
+    i32 4, label %sw.bb101
+    i32 5, label %sw.bb102
+    i32 6, label %sw.bb103
+    i32 7, label %sw.bb104
+    i32 8, label %sw.bb105
+  ]
+
+sw.bb99:                                          ; preds = %sw.bb96
+  br label %return
+
+sw.bb100:                                         ; preds = %sw.bb96
+  br label %return
+
+sw.bb101:                                         ; preds = %sw.bb96
+  br label %return
+
+sw.bb102:                                         ; preds = %sw.bb96
+  br label %return
+
+sw.bb103:                                         ; preds = %sw.bb96
+  br label %return
+
+sw.bb104:                                         ; preds = %sw.bb96
+  br label %return
+
+sw.bb105:                                         ; preds = %sw.bb96
+  br label %return
+
+sw.default106:                                    ; preds = %sw.bb96
+  br label %return
+
+sw.bb112:                                         ; preds = %if.end
+  %4 = trunc i64 %and10 to i32
+  %conv113 = and i32 %4, 255
+  switch i32 %conv113, label %return [
+    i32 4, label %sw.bb114
+    i32 6, label %sw.bb127
+  ]
+
+sw.bb114:                                         ; preds = %sw.bb112
+  %conv115 = and i64 %and14, 255
+  %cond5 = icmp eq i64 %conv115, 2
+  %conv117 = and i64 %and18, 255
+  %cond6 = icmp eq i64 %conv117, 1
+  %or.cond = and i1 %cond5, %cond6
+  %conv119 = and i64 %and22, 255
+  %cond7 = icmp eq i64 %conv119, 2
+  %or.cond39 = and i1 %or.cond, %cond7
+  br i1 %or.cond39, label %sw.bb120, label %return
+
+sw.bb120:                                         ; preds = %sw.bb114
+  %conv121 = and i64 %and26, 255
+  %cond8 = icmp eq i64 %conv121, 4
+  %.37 = select i1 %cond8, i8* getelementptr inbounds ([5 x i8]* @.str514367, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0)
+  br label %return
+
+sw.bb127:                                         ; preds = %sw.bb112
+  %conv128 = and i64 %and14, 255
+  %cond = icmp eq i64 %conv128, 4
+  %conv130 = and i64 %and18, 255
+  %cond1 = icmp eq i64 %conv130, 1
+  %or.cond40 = and i1 %cond, %cond1
+  %conv132 = and i64 %and22, 255
+  %cond2 = icmp eq i64 %conv132, 2
+  %or.cond41 = and i1 %or.cond40, %cond2
+  %conv134 = and i64 %and26, 255
+  %cond3 = icmp eq i64 %conv134, 4
+  %or.cond42 = and i1 %or.cond41, %cond3
+  br i1 %or.cond42, label %sw.bb135, label %return
+
+sw.bb135:                                         ; preds = %sw.bb127
+  %conv136 = and i64 %and30, 255
+  %cond4 = icmp eq i64 %conv136, 2
+  %.38 = select i1 %cond4, i8* getelementptr inbounds ([12 x i8]* @.str868214, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0)
+  br label %return
+
+return:                                           ; preds = %sw.bb135, %sw.bb127, %sw.bb120, %sw.bb114, %sw.bb112, %sw.default106, %sw.bb105, %sw.bb104, %sw.bb103, %sw.bb102, %sw.bb101, %sw.bb100, %sw.bb99, %sw.bb96, %sw.bb92, %sw.default91, %sw.bb90, %sw.bb89, %sw.bb88, %sw.bb87, %sw.bb84, %sw.default83, %sw.bb82, %sw.bb79, %sw.bb77, %sw.bb75, %sw.default74, %sw.bb68, %sw.bb68, %sw.bb68, %sw.bb68, %sw.bb66, %sw.bb64, %sw.default61, %sw.bb60, %sw.bb56, %sw.default54, %sw.bb53, %sw.bb52, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb42, %sw.bb39, %sw.bb37, %sw.bb35, %sw.bb, %if.end, %lor.lhs.false, %entry
+  %retval.0 = phi i8* [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default106 ], [ getelementptr inbounds ([44 x i8]* @.str858213, i32 0, i32 0), %sw.bb105 ], [ getelementptr inbounds ([29 x i8]* @.str848212, i32 0, i32 0), %sw.bb104 ], [ getelementptr inbounds ([31 x i8]* @.str838211, i32 0, i32 0), %sw.bb103 ], [ getelementptr inbounds ([34 x i8]* @.str828210, i32 0, i32 0), %sw.bb102 ], [ getelementptr inbounds ([20 x i8]* @.str818209, i32 0, i32 0), %sw.bb101 ], [ getelementptr inbounds ([24 x i8]* @.str808208, i32 0, i32 0), %sw.bb100 ], [ getelementptr inbounds ([25 x i8]* @.str798207, i32 0, i32 0), %sw.bb99 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default91 ], [ getelementptr inbounds ([8 x i8]* @.str768204, i32 0, i32 0), %sw.bb90 ], [ getelementptr inbounds ([21 x i8]* @.str758203, i32 0, i32 0), %sw.bb89 ], [ getelementptr inbounds ([26 x i8]* @.str748202, i32 0, i32 0), %sw.bb88 ], [ getelementptr inbounds ([21 x i8]* @.str738201, i32 0, i32 0), %sw.bb87 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default83 ], [ getelementptr inbounds ([9 x i8]* @.str718199, i32 0, i32 0), %sw.bb82 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default74 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default61 ], [ getelementptr inbounds ([5 x i8]* @.str514367, i32 0, i32 0), %sw.bb60 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default54 ], [ getelementptr inbounds ([4 x i8]* @.str2417235, i32 0, i32 0), %sw.bb53 ], [ getelementptr inbounds ([14 x i8]* @.str2317234, i32 0, i32 0), %sw.bb52 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %lor.lhs.false ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %entry ], [ %., %sw.bb39 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([3 x i8]* @.str20216493, i32 0, i32 0), %sw.bb42 ], [ %.35, %sw.bb56 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb37 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb35 ], [ getelementptr inbounds ([4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([6 x i8]* @.str398167, i32 0, i32 0), %sw.bb79 ], [ getelementptr inbounds ([5 x i8]* @.str2717238, i32 0, i32 0), %sw.bb84 ], [ %.36, %sw.bb92 ], [ getelementptr inbounds ([22 x i8]* @.str788206, i32 0, i32 0), %sw.bb96 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb77 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb75 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb66 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb64 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb ], [ %.37, %sw.bb120 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb114 ], [ %.38, %sw.bb135 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb127 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb112 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %if.end ]
+  ret i8* %retval.0
+}
+
+; Function Attrs: nounwind readonly
+define hidden i8* @_ZN12MediaInfoLib27Mxf_Sequence_DataDefinitionEN6ZenLib7uint128E(%"class.ZenLib::uint128"* nocapture readonly %DataDefinition) #0 {
+entry:
+  %lo = getelementptr inbounds %"class.ZenLib::uint128"* %DataDefinition, i32 0, i32 0
+  %0 = load i64* %lo, align 1, !tbaa !7
+  %and = lshr i64 %0, 32
+  %conv = trunc i64 %and to i32
+  %and2 = lshr i64 %0, 24
+  %conv5 = and i32 %conv, 255
+  switch i32 %conv5, label %return [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb9
+  ]
+
+sw.bb:                                            ; preds = %entry
+  %conv4 = trunc i64 %and2 to i32
+  %conv6 = and i32 %conv4, 255
+  switch i32 %conv6, label %sw.default [
+    i32 1, label %return
+    i32 2, label %return
+    i32 3, label %return
+    i32 16, label %sw.bb8
+  ]
+
+sw.bb8:                                           ; preds = %sw.bb
+  br label %return
+
+sw.default:                                       ; preds = %sw.bb
+  br label %return
+
+sw.bb9:                                           ; preds = %entry
+  %1 = trunc i64 %and2 to i32
+  %conv10 = and i32 %1, 255
+  switch i32 %conv10, label %sw.default14 [
+    i32 1, label %return
+    i32 2, label %sw.bb12
+    i32 3, label %sw.bb13
+  ]
+
+sw.bb12:                                          ; preds = %sw.bb9
+  br label %return
+
+sw.bb13:                                          ; preds = %sw.bb9
+  br label %return
+
+sw.default14:                                     ; preds = %sw.bb9
+  br label %return
+
+return:                                           ; preds = %sw.default14, %sw.bb13, %sw.bb12, %sw.bb9, %sw.default, %sw.bb8, %sw.bb, %sw.bb, %sw.bb, %entry
+  %retval.0 = phi i8* [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default14 ], [ getelementptr inbounds ([5 x i8]* @.str1717689, i32 0, i32 0), %sw.bb13 ], [ getelementptr inbounds ([6 x i8]* @.str928220, i32 0, i32 0), %sw.bb12 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default ], [ getelementptr inbounds ([21 x i8]* @.str908218, i32 0, i32 0), %sw.bb8 ], [ getelementptr inbounds ([5 x i8]* @.str9210864, i32 0, i32 0), %sw.bb ], [ getelementptr inbounds ([5 x i8]* @.str9210864, i32 0, i32 0), %sw.bb ], [ getelementptr inbounds ([5 x i8]* @.str9210864, i32 0, i32 0), %sw.bb ], [ getelementptr inbounds ([8 x i8]* @.str918219, i32 0, i32 0), %sw.bb9 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %entry ]
+  ret i8* %retval.0
+}
+
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0))
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
+
+attributes #0 = { nounwind readonly }
+
+!llvm.ident = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0}
+!llvm.module.flags = !{!1}
+
+!0 = metadata !{metadata !"clang version 3.4 (git@github.com:kripken/emscripten-fastcomp-clang.git 406c991ba0416c838ee097361c27a12411a088b9) (https://chromium.googlesource.com/native_client/pnacl-llvm a5e8942da586a7ef0ed02361b77a3010f16428cf)"}
+!1 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!2 = metadata !{metadata !3, metadata !4, i64 8}
+!3 = metadata !{metadata !"_ZTSN6ZenLib7uint128E", metadata !4, i64 0, metadata !4, i64 8}
+!4 = metadata !{metadata !"long long", metadata !5, i64 0}
+!5 = metadata !{metadata !"omnipotent char", metadata !6, i64 0}
+!6 = metadata !{metadata !"Simple C/C++ TBAA"}
+!7 = metadata !{metadata !3, metadata !4, i64 0}
+
diff --git a/test/CodeGen/JS/expand-insertextract.ll b/test/CodeGen/JS/expand-insertextract.ll
new file mode 100644
index 00000000000..7a247380f32
--- /dev/null
+++ b/test/CodeGen/JS/expand-insertextract.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+; CHECK:  sp = STACKTOP;
+; CHECK:  STACKTOP = STACKTOP + 16|0;
+; CHECK:  $0 = sp;
+; CHECK:  SIMD_float32x4_store(HEAPU8, $0, $p);
+; CHECK:  $1 = (($0) + ($i<<2)|0);
+; CHECK:  $2 = +HEAPF32[$1>>2];
+; CHECK:  STACKTOP = sp;return (+$2);
+; CHECK: }
+define float @ext(<4 x float> %p, i32 %i) {
+  %f = extractelement <4 x float> %p, i32 %i
+  ret float %f
+}
+
+; CHECK:  sp = STACKTOP;
+; CHECK:  STACKTOP = STACKTOP + 16|0;
+; CHECK:  $0 = sp;
+; CHECK:  SIMD_float32x4_store(HEAPU8, $0, $p);
+; CHECK:  $1 = (($0) + ($i<<2)|0);
+; CHECK:  HEAPF32[$1>>2] = $f;
+; CHECK:  $2 = SIMD_float32x4_load(HEAPU8, $0);
+; CHECK:  STACKTOP = sp;return (SIMD_float32x4($2));
+; CHECK: }
+define <4 x float> @ins(<4 x float> %p, float %f, i32 %i) {
+  %v = insertelement <4 x float> %p, float %f, i32 %i
+  ret <4 x float> %v
+}
diff --git a/test/CodeGen/JS/simd-loadstore.ll b/test/CodeGen/JS/simd-loadstore.ll
new file mode 100644
index 00000000000..60ed1679512
--- /dev/null
+++ b/test/CodeGen/JS/simd-loadstore.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+; CHECK: function _fx1($p) {
+; CHECK:  $p = $p|0;
+; CHECK:  var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0)
+; CHECK:  $t = SIMD_float32x4_loadX(HEAPU8, $p);
+; CHECK:  $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+0.5),Math_fround(+0),Math_fround(+0),Math_fround(+0)));
+; CHECK:  $q = $p;SIMD_float32x4_storeX(HEAPU8, $q, $s);
+; CHECK:  return;
+; CHECK: }
+define void @fx1(i8* %p) {
+    %q = bitcast i8* %p to <1 x float>*
+    %t = load <1 x float>* %q
+    %s = fadd <1 x float> %t, <float 0.5>
+    store <1 x float> %s, <1 x float>* %q
+    ret void
+}
+
+; CHECK: function _fx2($p) {
+; CHECK:  $p = $p|0;
+; CHECK:  var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0)
+; CHECK:  $t = SIMD_float32x4_loadXY(HEAPU8, $p);
+; CHECK:  $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+3.5),Math_fround(+7.5),Math_fround(+0),Math_fround(+0)));
+; CHECK:  $q = $p;SIMD_float32x4_storeXY(HEAPU8, $q, $s);
+; CHECK:  return;
+; CHECK: }
+define void @fx2(i8* %p) {
+    %q = bitcast i8* %p to <2 x float>*
+    %t = load <2 x float>* %q
+    %s = fadd <2 x float> %t, <float 3.5, float 7.5>
+    store <2 x float> %s, <2 x float>* %q
+    ret void
+}
+
+; CHECK: function _fx3($p) {
+; CHECK:  $p = $p|0;
+; CHECK:  var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0)
+; CHECK:  $t = SIMD_float32x4_loadXYZ(HEAPU8, $p);
+; CHECK:  $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+1.5),Math_fround(+4.5),Math_fround(+6.5),Math_fround(+0)));
+; CHECK:  $q = $p;SIMD_float32x4_storeXYZ(HEAPU8, $q, $s);
+; CHECK:  return;
+; CHECK: }
+define void @fx3(i8* %p) {
+    %q = bitcast i8* %p to <3 x float>*
+    %t = load <3 x float>* %q
+    %s = fadd <3 x float> %t, <float 1.5, float 4.5, float 6.5>
+    store <3 x float> %s, <3 x float>* %q
+    ret void
+}
+
+; CHECK: function _fx4($p) {
+; CHECK:  $p = $p|0;
+; CHECK:  var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0)
+; CHECK:  $t = SIMD_float32x4_load(HEAPU8, $p);
+; CHECK:  $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+9.5),Math_fround(+5.5),Math_fround(+1.5),Math_fround(+-3.5)));
+; CHECK:  $q = $p;SIMD_float32x4_store(HEAPU8, $q, $s);
+; CHECK:  return;
+; CHECK: }
+define void @fx4(i8* %p) {
+    %q = bitcast i8* %p to <4 x float>*
+    %t = load <4 x float>* %q
+    %s = fadd <4 x float> %t, <float 9.5, float 5.5, float 1.5, float -3.5>
+    store <4 x float> %s, <4 x float>* %q
+    ret void
+}
diff --git a/test/CodeGen/JS/simd-select.ll b/test/CodeGen/JS/simd-select.ll
new file mode 100644
index 00000000000..7547b199049
--- /dev/null
+++ b/test/CodeGen/JS/simd-select.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+; CHECK: function _test0($a,$b,$cond) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  $cond = SIMD_int32x4($cond);
+; CHECK:  $cmp = SIMD_int32x4_select($cond,$a,$b);
+; CHECK:  return (SIMD_int32x4($cmp));
+; CHECK: }
+define <4 x i32> @test0(<4 x i32> %a, <4 x i32> %b, <4 x i1> %cond) nounwind {
+entry:
+  %cmp = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %cmp
+}
+
+; CHECK: function _test1($a,$b,$cond) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  $cond = SIMD_int32x4($cond);
+; CHECK:  $cmp = SIMD_float32x4_select($cond,$a,$b);
+; CHECK:  return (SIMD_float32x4($cmp));
+; CHECK: }
+define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %cond) nounwind {
+entry:
+  %cmp = select <4 x i1> %cond, <4 x float> %a, <4 x float> %b
+  ret <4 x float> %cmp
+}
+
+; CHECK: function _test2($a,$b,$cond) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  $cond = $cond|0;
+; CHECK:  $cmp = $cond ? $a : $b;
+; CHECK:  return (SIMD_int32x4($cmp));
+; CHECK: }
+define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b, i1 %cond) nounwind {
+entry:
+  %cmp = select i1 %cond, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %cmp
+}
+
+; CHECK: function _test3($a,$b,$cond) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  $cond = $cond|0;
+; CHECK:  $cmp = $cond ? $a : $b;
+; CHECK:  return (SIMD_float32x4($cmp));
+; CHECK: }
+define <4 x float> @test3(<4 x float> %a, <4 x float> %b, i1 %cond) nounwind {
+entry:
+  %cmp = select i1 %cond, <4 x float> %a, <4 x float> %b
+  ret <4 x float> %cmp
+}
diff --git a/test/CodeGen/JS/simd-shift.ll b/test/CodeGen/JS/simd-shift.ll
new file mode 100644
index 00000000000..09819876fd8
--- /dev/null
+++ b/test/CodeGen/JS/simd-shift.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+; CHECK: function _test0($a) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $shl = SIMD_int32x4_shiftLeftByScalar($a, 3);
+; CHECK:  return (SIMD_int32x4($shl));
+; CHECK: }
+define <4 x i32> @test0(<4 x i32> %a) {
+entry:
+  %shl = shl <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %shl
+}
+
+; CHECK: function _test1($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = $b|0;
+; CHECK:  SIMD_int32x4_shiftLeftByScalar($a, $b);
+; CHECK:  return (SIMD_int32x4($shl));
+; CHECK: }
+define <4 x i32> @test1(<4 x i32> %a, i32 %b) {
+entry:
+  %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3
+  %shl = shl <4 x i32> %a, %vecinit3
+  ret <4 x i32> %shl
+}
+
+; CHECK: function _test2($a,$b,$c) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = $b|0;
+; CHECK:  $c = $c|0;
+; CHECK:  var $shl = SIMD_int32x4(0,0,0,0), $vecinit3 = SIMD_int32x4(0,0,0,0), label = 0, sp = 0;
+; CHECK:  $vecinit3 = SIMD_int32x4($b, $b, $c, $b);
+; CHECK:  $shl = SIMD_int32x4(($a.x|0) << ($vecinit3.x|0)|0, ($a.y|0) << ($vecinit3.y|0)|0, ($a.z|0) << ($vecinit3.z|0)|0, ($a.w|0) << ($vecinit3.w|0)|0);
+; CHECK:  return (SIMD_int32x4($shl));
+; CHECK: }
+define <4 x i32> @test2(<4 x i32> %a, i32 %b, i32 %c) {
+entry:
+  %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3
+  %shl = shl <4 x i32> %a, %vecinit3
+  ret <4 x i32> %shl
+}
+
+; CHECK: function _test3($a) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  SIMD_int32x4_shiftRightArithmeticByScalar($a, 3);
+; CHECK:  return (SIMD_int32x4($shr));
+; CHECK: }
+define <4 x i32> @test3(<4 x i32> %a) {
+entry:
+  %shr = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %shr
+}
+
+; CHECK: function _test4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = $b|0;
+; CHECK:  SIMD_int32x4_shiftRightArithmeticByScalar($a, $b);
+; CHECK:  return (SIMD_int32x4($shr));
+; CHECK: }
+define <4 x i32> @test4(<4 x i32> %a, i32 %b) {
+entry:
+  %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3
+  %shr = ashr <4 x i32> %a, %vecinit3
+  ret <4 x i32> %shr
+}
+
+; CHECK: function _test5($a,$b,$c) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = $b|0;
+; CHECK:  $c = $c|0;
+; CHECK:  var $shr = SIMD_int32x4(0,0,0,0), $vecinit3 = SIMD_int32x4(0,0,0,0), label = 0, sp = 0;
+; CHECK:  $vecinit3 = SIMD_int32x4($b, $c, $b, $b);
+; CHECK:  $shr = SIMD_int32x4(($a.x|0) >> ($vecinit3.x|0)|0, ($a.y|0) >> ($vecinit3.y|0)|0, ($a.z|0) >> ($vecinit3.z|0)|0, ($a.w|0) >> ($vecinit3.w|0)|0);
+; CHECK:  return (SIMD_int32x4($shr));
+; CHECK: }
+define <4 x i32> @test5(<4 x i32> %a, i32 %b, i32 %c) {
+entry:
+  %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %c, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3
+  %shr = ashr <4 x i32> %a, %vecinit3
+  ret <4 x i32> %shr
+}
+
+; CHECK: function _test6($a) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  SIMD_int32x4_shiftRightLogicalByScalar($a, 3);
+; CHECK:  return (SIMD_int32x4($lshr));
+; CHECK: }
+define <4 x i32> @test6(<4 x i32> %a) {
+entry:
+  %lshr = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %lshr
+}
+
+; CHECK: function _test7($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = $b|0;
+; CHECK:  $lshr = SIMD_int32x4_shiftRightLogicalByScalar($a, $b);
+; CHECK:  return (SIMD_int32x4($lshr));
+; CHECK: }
+define <4 x i32> @test7(<4 x i32> %a, i32 %b) {
+entry:
+  %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3
+  %lshr = lshr <4 x i32> %a, %vecinit3
+  ret <4 x i32> %lshr
+}
+
+; CHECK: function _test8($a,$b,$c) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = $b|0;
+; CHECK:  $c = $c|0;
+; CHECK:  var $lshr = SIMD_int32x4(0,0,0,0), $vecinit3 = SIMD_int32x4(0,0,0,0), label = 0, sp = 0;
+; CHECK:  $vecinit3 = SIMD_int32x4($b, $b, $b, $c);
+; CHECK:  $lshr = SIMD_int32x4(($a.x|0) >>> ($vecinit3.x|0)|0, ($a.y|0) >>> ($vecinit3.y|0)|0, ($a.z|0) >>> ($vecinit3.z|0)|0, ($a.w|0) >>> ($vecinit3.w|0)|0);
+; CHECK:  return (SIMD_int32x4($lshr));
+; CHECK: }
+define <4 x i32> @test8(<4 x i32> %a, i32 %b, i32 %c) {
+entry:
+  %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %c, i32 3
+  %lshr = lshr <4 x i32> %a, %vecinit3
+  ret <4 x i32> %lshr
+}
diff --git a/test/CodeGen/JS/simd-shuffle.ll b/test/CodeGen/JS/simd-shuffle.ll
new file mode 100644
index 00000000000..3e7667c56f4
--- /dev/null
+++ b/test/CodeGen/JS/simd-shuffle.ll
@@ -0,0 +1,524 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+; CHECK: function _splat_int32x4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @splat_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32><i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _swizzle_int32x4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 3, 1, 2);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @swizzle_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32><i32 0, i32 3, i32 1, i32 2>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _swizzlehi_int32x4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($b, 2, 1, 3, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @swizzlehi_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32><i32 6, i32 5, i32 7, i32 4>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _shuffleXY_float32x4to3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 7, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @shuffleXY_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32><i32 7, i32 0, i32 undef>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _shuffle_int32x4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 7, 0, 5, 3);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @shuffle_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32><i32 7, i32 0, i32 5, i32 3>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _shuffleXY_int32x4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 7, 0, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @shuffleXY_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32><i32 7, i32 0, i32 undef, i32 undef>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _splat_int32x3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @splat_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32><i32 0, i32 0, i32 0>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _swizzle_int32x3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 2, 1, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @swizzle_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32><i32 0, i32 2, i32 1>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _swizzlehi_int32x3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($b, 0, 2, 1, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @swizzlehi_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32><i32 3, i32 5, i32 4>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _shuffle_int32x3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 6, 0, 5);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @shuffle_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32><i32 5, i32 0, i32 4>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _shuffleXY_int32x3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 6, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @shuffleXY_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32><i32 5, i32 0, i32 undef>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _splat_int32x3to4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @splat_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32><i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _swizzle_int32x3to4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 2, 1, 2);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @swizzle_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32><i32 0, i32 2, i32 1, i32 2>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _swizzlehi_int32x3to4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($b, 2, 1, 0, 2);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @swizzlehi_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32><i32 5, i32 4, i32 3, i32 5>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _shuffle_int32x3to4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 6, 0, 5, 2);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @shuffle_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32><i32 5, i32 0, i32 4, i32 2>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _shuffleXY_int32x3to4($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 6, 0, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <4 x i32> @shuffleXY_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32><i32 5, i32 0, i32 undef, i32 undef>
+  ret <4 x i32> %sel
+}
+
+; CHECK: function _splat_int32x4to3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @splat_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32><i32 0, i32 0, i32 0>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _swizzle_int32x4to3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($a, 0, 3, 1, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @swizzle_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32><i32 0, i32 3, i32 1>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _swizzlehi_int32x4to3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_swizzle($b, 2, 1, 3, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @swizzlehi_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32><i32 6, i32 5, i32 7>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _shuffle_int32x4to3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 7, 0, 5);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @shuffle_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32><i32 7, i32 0, i32 5>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _shuffleXY_int32x4to3($a,$b) {
+; CHECK:  $a = SIMD_int32x4($a);
+; CHECK:  $b = SIMD_int32x4($b);
+; CHECK:  var $sel = SIMD_int32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_int32x4_shuffle($a, $b, 7, 0, 0);
+; CHECK:  return (SIMD_int32x4($sel));
+; CHECK: }
+define <3 x i32> @shuffleXY_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32><i32 7, i32 0, i32 undef>
+  ret <3 x i32> %sel
+}
+
+; CHECK: function _splat_float32x4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @splat_float32x4(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 0, i32 0, i32 0>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _swizzle_float32x4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 3, 1, 2);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @swizzle_float32x4(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 3, i32 1, i32 2>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _swizzlehi_float32x4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($b, 2, 1, 3, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @swizzlehi_float32x4(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 6, i32 5, i32 7, i32 4>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _shuffle_float32x4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 7, 0, 5, 3);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @shuffle_float32x4(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 7, i32 0, i32 5, i32 3>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _shuffleXY_float32x4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 7, 0, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @shuffleXY_float32x4(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 7, i32 0, i32 undef, i32 undef>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _splat_float32x3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @splat_float32x3(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32><i32 0, i32 0, i32 0>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _swizzle_float32x3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 2, 1, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @swizzle_float32x3(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32><i32 0, i32 2, i32 1>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _swizzlehi_float32x3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($b, 0, 2, 1, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @swizzlehi_float32x3(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32><i32 3, i32 5, i32 4>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _shuffle_float32x3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 6, 0, 5);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @shuffle_float32x3(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32><i32 5, i32 0, i32 4>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _shuffleXY_float32x3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 6, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @shuffleXY_float32x3(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32><i32 5, i32 0, i32 undef>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _splat_float32x3to4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @splat_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32><i32 0, i32 0, i32 0, i32 0>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _swizzle_float32x3to4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 2, 1, 2);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @swizzle_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32><i32 0, i32 2, i32 1, i32 2>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _swizzlehi_float32x3to4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($b, 2, 1, 0, 2);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @swizzlehi_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32><i32 5, i32 4, i32 3, i32 5>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _shuffle_float32x3to4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 6, 0, 5, 2);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @shuffle_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32><i32 5, i32 0, i32 4, i32 2>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _shuffleXY_float32x3to4($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 6, 0, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <4 x float> @shuffleXY_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32><i32 5, i32 0, i32 undef, i32 undef>
+  ret <4 x float> %sel
+}
+
+; CHECK: function _splat_float32x4to3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 0, 0, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @splat_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32><i32 0, i32 0, i32 0>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _swizzle_float32x4to3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($a, 0, 3, 1, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @swizzle_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32><i32 0, i32 3, i32 1>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _swizzlehi_float32x4to3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_swizzle($b, 2, 1, 3, 0);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @swizzlehi_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32><i32 6, i32 5, i32 7>
+  ret <3 x float> %sel
+}
+
+; CHECK: function _shuffle_float32x4to3($a,$b) {
+; CHECK:  $a = SIMD_float32x4($a);
+; CHECK:  $b = SIMD_float32x4($b);
+; CHECK:  var $sel = SIMD_float32x4(0,0,0,0)
+; CHECK:  $sel = SIMD_float32x4_shuffle($a, $b, 7, 0, 5);
+; CHECK:  return (SIMD_float32x4($sel));
+; CHECK: }
+define <3 x float> @shuffle_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32><i32 7, i32 0, i32 5>
+  ret <3 x float> %sel
+}