diff --git a/emscripten-version.txt b/emscripten-version.txt
index bf2b5ccfefb..87684ab8378 100644
--- a/emscripten-version.txt
+++ b/emscripten-version.txt
@@ -1,2 +1,2 @@
-1.13.0
+1.13.1
 
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index a7a1cc2b94f..ab5eaf027c5 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -1151,14 +1151,21 @@ bool ModuleLinker::run() {
     DstM->setTargetTriple(SrcM->getTargetTriple());
 
   if (!SrcM->getDataLayout().empty() && !DstM->getDataLayout().empty() &&
-      SrcM->getDataLayout() != DstM->getDataLayout())
-    errs() << "WARNING: Linking two modules of different data layouts!\n";
+      SrcM->getDataLayout() != DstM->getDataLayout()) {
+    // XXX EMSCRIPTEN: backport r203009 from LLVM trunk:
+    errs() << "WARNING: Linking two modules of different data layouts: '"
+           << SrcM->getModuleIdentifier() << "' is '"
+           << SrcM->getDataLayout() << "' whereas '"
+           << DstM->getModuleIdentifier() << "' is '"
+           << DstM->getDataLayout() << "'\n";
+  }
   if (!SrcM->getTargetTriple().empty() &&
       DstM->getTargetTriple() != SrcM->getTargetTriple()) {
-    errs() << "WARNING: Linking two modules of different target triples: ";
-    if (!SrcM->getModuleIdentifier().empty())
-      errs() << SrcM->getModuleIdentifier() << ": ";
-    errs() << "'" << SrcM->getTargetTriple() << "' and '" 
+    // XXX EMSCRIPTEN: backport r203009 from LLVM trunk:
+    errs() << "WARNING: Linking two modules of different target triples: "
+           << SrcM->getModuleIdentifier() << "' is '"
+           << SrcM->getTargetTriple() << "' whereas '"
+           << DstM->getModuleIdentifier() << "' is '"
            << DstM->getTargetTriple() << "'\n";
   }
 
diff --git a/lib/Target/JSBackend/AllocaManager.cpp b/lib/Target/JSBackend/AllocaManager.cpp
new file mode 100644
index 00000000000..b49e6c4de13
--- /dev/null
+++ b/lib/Target/JSBackend/AllocaManager.cpp
@@ -0,0 +1,527 @@
+//===-- AllocaManager.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AllocaManager class.
+//
+// The AllocaManager computes a frame layout, assigning every static alloca an
+// offset. It does alloca liveness analysis in order to reuse stack memory,
+// using lifetime intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "allocamanager"
+#include "AllocaManager.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumAllocas, "Number of allocas eliminated");
+
+// Return the size of the given alloca.
+uint64_t AllocaManager::getSize(const AllocaInst *AI) {
+  assert(AI->isStaticAlloca());
+  return DL->getTypeAllocSize(AI->getAllocatedType()) *
+         cast<ConstantInt>(AI->getArraySize())->getValue().getZExtValue();
+}
+
+// Return the alignment of the given alloca.
+unsigned AllocaManager::getAlignment(const AllocaInst *AI) {
+  assert(AI->isStaticAlloca());
+  return std::max(AI->getAlignment(),
+                  DL->getABITypeAlignment(AI->getAllocatedType()));
+}
+
+AllocaManager::AllocaInfo AllocaManager::getInfo(const AllocaInst *AI) {
+  assert(AI->isStaticAlloca());
+  return AllocaInfo(AI, getSize(AI), getAlignment(AI));
+}
+
+// Given a lifetime_start or lifetime_end intrinsic, determine if it's
+// describing a static alloc memory region suitable for our analysis. If so,
+// return the alloca, otherwise return NULL.
+const AllocaInst *
+AllocaManager::getAllocaFromIntrinsic(const CallInst *CI) {
+  const IntrinsicInst *II = cast<IntrinsicInst>(CI);
+  assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
+         II->getIntrinsicID() == Intrinsic::lifetime_end);
+
+  // Lifetime intrinsics have a size as their first argument and a pointer as
+  // their second argument.
+  const Value *Size = II->getArgOperand(0);
+  const Value *Ptr = II->getArgOperand(1);
+
+  // Check to see if we can convert the size to a host integer. If we can't,
+  // it's probably not worth worrying about.
+  const ConstantInt *SizeCon = dyn_cast<ConstantInt>(Size);
+  if (!SizeCon) return NULL;
+  const APInt &SizeAP = SizeCon->getValue();
+  if (SizeAP.getActiveBits() > 64) return NULL;
+  uint64_t MarkedSize = SizeAP.getZExtValue();
+
+  // We're only interested if the pointer is a static alloca.
+  const AllocaInst *AI = dyn_cast<AllocaInst>(Ptr->stripPointerCasts());
+  if (!AI || !AI->isStaticAlloca()) return NULL;
+
+  // Make sure the size covers the alloca.
+  if (MarkedSize < getSize(AI)) return NULL;
+
+  return AI;
+}
+
+int AllocaManager::AllocaSort(const void *l, const void *r) {
+  const AllocaInfo *li = static_cast<const AllocaInfo *>(l);
+  const AllocaInfo *ri = static_cast<const AllocaInfo *>(r);
+
+  // Sort by alignment to minimize padding.
+  if (li->getAlignment() > ri->getAlignment()) return -1;
+  if (li->getAlignment() < ri->getAlignment()) return 1;
+
+  // Ensure a stable sort. We can do this because the pointers are
+  // pointing into the same array.
+  if (li > ri) return -1;
+  if (li < ri) return 1;
+
+  return 0;
+}
+
+// Collect allocas
+void AllocaManager::collectMarkedAllocas() {
+  NamedRegionTimer Timer("Collect Marked Allocas", "AllocaManager",
+                         TimePassesIsEnabled);
+
+  // Weird semantics: If an alloca *ever* appears in a lifetime start or end
+  // within the same function, its lifetime begins only at the explicit lifetime
+  // starts and ends only at the explicit lifetime ends and function exit
+  // points. Otherwise, its lifetime begins in the entry block and it is live
+  // everywhere.
+  //
+  // And so, instead of just walking the entry block to find all the static
+  // allocas, we walk the whole body to find the intrinsics so we can find the
+  // set of static allocas referenced in the intrinsics.
+  for (Function::const_iterator FI = F->begin(), FE = F->end();
+       FI != FE; ++FI) {
+    for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end();
+         BI != BE; ++BI) {
+      const CallInst *CI = dyn_cast<CallInst>(BI);
+      if (!CI) continue;
+
+      const Value *Callee = CI->getCalledValue();
+      if (Callee == LifetimeStart || Callee == LifetimeEnd) {
+        if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+          Allocas.insert(std::make_pair(AI, 0));
+        }
+      }
+    }
+  }
+
+  // All that said, we still want the intrinsics in the order they appear in the
+  // block, so that we can represent later ones with earlier ones and skip
+  // worrying about dominance, so run through the entry block and index those
+  // allocas which we identified above.
+  AllocasByIndex.reserve(Allocas.size());
+  const BasicBlock *EntryBB = &F->getEntryBlock();
+  for (BasicBlock::const_iterator BI = EntryBB->begin(), BE = EntryBB->end();
+       BI != BE; ++BI) {
+    const AllocaInst *AI = dyn_cast<AllocaInst>(BI);
+    if (!AI || !AI->isStaticAlloca()) continue;
+
+    AllocaMap::iterator I = Allocas.find(AI);
+    if (I != Allocas.end()) {
+      I->second = AllocasByIndex.size();
+      AllocasByIndex.push_back(getInfo(AI));
+    }
+  }
+  assert(AllocasByIndex.size() == Allocas.size());
+}
+
+// Calculate the starting point from which inter-block liveness will be
+// computed.
+void AllocaManager::collectBlocks() {
+  NamedRegionTimer Timer("Collect Blocks", "AllocaManager",
+                         TimePassesIsEnabled);
+
+  size_t AllocaCount = AllocasByIndex.size();
+
+  BitVector Seen(AllocaCount);
+
+  for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) {
+    const BasicBlock *BB = I;
+
+    BlockLifetimeInfo &BLI = BlockLiveness[BB];
+    BLI.Start.resize(AllocaCount);
+    BLI.End.resize(AllocaCount);
+
+    // Track which allocas we've seen. This is used because if a lifetime start
+    // is the first lifetime marker for an alloca in a block, the alloca is
+    // live-in.
+    Seen.reset();
+
+    // Walk the instructions and compute the Start and End sets.
+    for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end();
+         BI != BE; ++BI) {
+      const CallInst *CI = dyn_cast<CallInst>(BI);
+      if (!CI) continue;
+
+      const Value *Callee = CI->getCalledValue();
+      if (Callee == LifetimeStart) {
+        if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+          AllocaMap::const_iterator MI = Allocas.find(AI);
+          if (MI != Allocas.end()) {
+            size_t AllocaIndex = MI->second;
+            if (!Seen.test(AllocaIndex)) {
+              BLI.Start.set(AllocaIndex);
+            }
+            BLI.End.reset(AllocaIndex);
+            Seen.set(AllocaIndex);
+          }
+        }
+      } else if (Callee == LifetimeEnd) {
+        if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+          AllocaMap::const_iterator MI = Allocas.find(AI);
+          if (MI != Allocas.end()) {
+            size_t AllocaIndex = MI->second;
+            BLI.End.set(AllocaIndex);
+            Seen.set(AllocaIndex);
+          }
+        }
+      }
+    }
+
+    // Lifetimes that start in this block and do not end here are live-out.
+    BLI.LiveOut = BLI.Start;
+    BLI.LiveOut.reset(BLI.End);
+    if (BLI.LiveOut.any()) {
+      for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+           SI != SE; ++SI) {
+        InterBlockWorklist.insert(*SI);
+      }
+    }
+
+    // Lifetimes that end in this block and do not start here are live-in.
+    // TODO: Is this actually true? What are the semantics of a standalone
+    // lifetime end? See also the code in computeInterBlockLiveness.
+    BLI.LiveIn = BLI.End;
+    BLI.LiveIn.reset(BLI.Start);
+    if (BLI.LiveIn.any()) {
+      for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+           PI != PE; ++PI) {
+        InterBlockWorklist.insert(*PI);
+      }
+    }
+  }
+}
+
+// Compute the LiveIn and LiveOut sets for each block in F.
+void AllocaManager::computeInterBlockLiveness() {
+  NamedRegionTimer Timer("Compute inter-block liveness", "AllocaManager",
+                         TimePassesIsEnabled);
+
+  size_t AllocaCount = AllocasByIndex.size();
+
+  BitVector Temp(AllocaCount);
+
+  // This is currently using a very simple-minded bi-directional liveness
+  // propagation algorithm. Numerous opportunities for compile time
+  // speedups here.
+  while (!InterBlockWorklist.empty()) {
+    const BasicBlock *BB = InterBlockWorklist.pop_back_val();
+    BlockLifetimeInfo &BLI = BlockLiveness[BB];
+
+    // Compute the new live-in set.
+    for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+         PI != PE; ++PI) {
+      Temp |= BlockLiveness[*PI].LiveOut;
+    }
+
+    // If it contains new live blocks, prepare to propagate them.
+    if (Temp.test(BLI.LiveIn)) {
+      BLI.LiveIn |= Temp;
+      BitVector LiveOut = BLI.LiveOut;
+      BLI.LiveOut |= Temp;
+      BLI.LiveOut.reset(BLI.End);
+      // If we actually added to live-out, re-process them
+      if (BLI.LiveOut.test(LiveOut)) {
+        for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+             SI != SE; ++SI) {
+          InterBlockWorklist.insert(*SI);
+        }
+      }
+    }
+    Temp.reset();
+
+    // Compute the new live-out set.
+    for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+         SI != SE; ++SI) {
+      Temp |= BlockLiveness[*SI].LiveIn;
+    }
+
+    // If it contains new live blocks, prepare to propagate them.
+    if (Temp.test(BLI.LiveOut)) {
+      // TODO: As above, what are the semantics of a standalone lifetime end?
+      BLI.LiveOut |= Temp;
+      BitVector LiveIn = BLI.LiveIn;
+      BLI.LiveIn |= Temp;
+      BLI.LiveIn.reset(BLI.Start);
+      // If we actually added to live-in, re-process them
+      if (BLI.LiveIn.test(LiveIn)) {
+        for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+             PI != PE; ++PI) {
+          InterBlockWorklist.insert(*PI);
+        }
+      }
+    }
+    Temp.reset();
+  }
+}
+
+// Determine overlapping liveranges within blocks.
+void AllocaManager::computeIntraBlockLiveness() {
+  NamedRegionTimer Timer("Compute intra-block liveness", "AllocaManager",
+                         TimePassesIsEnabled);
+
+  size_t AllocaCount = AllocasByIndex.size();
+
+  BitVector Current(AllocaCount);
+
+  AllocaCompatibility.resize(AllocaCount, BitVector(AllocaCount, true));
+
+  for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) {
+    const BasicBlock *BB = I;
+    const BlockLifetimeInfo &BLI = BlockLiveness[BB];
+
+    Current = BLI.LiveIn;
+
+    for (int i = Current.find_first(); i >= 0; i = Current.find_next(i)) {
+      AllocaCompatibility[i].reset(Current);
+    }
+
+    for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end();
+         BI != BE; ++BI) {
+      const CallInst *CI = dyn_cast<CallInst>(BI);
+      if (!CI) continue;
+
+      const Value *Callee = CI->getCalledValue();
+      if (Callee == LifetimeStart) {
+        if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+          size_t AIndex = Allocas[AI];
+          // We conflict with everything else that's currently live.
+          AllocaCompatibility[AIndex].reset(Current);
+          // Everything else that's currently live conflicts with us.
+          for (int i = Current.find_first(); i >= 0; i = Current.find_next(i)) {
+            AllocaCompatibility[i].reset(AIndex);
+          }
+          // We're now live.
+          Current.set(AIndex);
+        }
+      } else if (Callee == LifetimeEnd) {
+        if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) {
+          size_t AIndex = Allocas[AI];
+          // We're no longer live.
+          Current.reset(AIndex);
+        }
+      }
+    }
+  }
+}
+
+// Decide which allocas will represent which other allocas, and if so what their
+// size and alignment will need to be.
+void AllocaManager::computeRepresentatives() {
+  NamedRegionTimer Timer("Compute Representatives", "AllocaManager",
+                         TimePassesIsEnabled);
+
+  for (size_t i = 0, e = AllocasByIndex.size(); i != e; ++i) {
+    // If we've already represented this alloca with another, don't visit it.
+    if (AllocasByIndex[i].isForwarded()) continue;
+    if (i > size_t(INT_MAX)) continue;
+
+    // Find compatible allocas. This is a simple greedy algorithm.
+    for (int j = int(i); ; ) {
+      assert(j >= int(i));
+      j = AllocaCompatibility[i].find_next(j);
+      assert(j != int(i));
+      if (j < 0) break;
+      if (!AllocaCompatibility[j][i]) continue;
+
+      DEBUG(dbgs() << "Allocas: "
+                      "Representing "
+                   << AllocasByIndex[j].getInst()->getName() << " "
+                      "with "
+                   << AllocasByIndex[i].getInst()->getName() << "\n");
+      ++NumAllocas;
+
+      assert(!AllocasByIndex[j].isForwarded());
+
+      AllocasByIndex[i].mergeSize(AllocasByIndex[j].getSize());
+      AllocasByIndex[i].mergeAlignment(AllocasByIndex[j].getAlignment());
+      AllocasByIndex[j].forward(i);
+
+      AllocaCompatibility[i] &= AllocaCompatibility[j];
+      AllocaCompatibility[j].reset();
+    }
+  }
+}
+
+void AllocaManager::computeFrameOffsets() {
+  NamedRegionTimer Timer("Compute Frame Offsets", "AllocaManager",
+                         TimePassesIsEnabled);
+
+  // Walk through the entry block and collect all the allocas, including the
+  // ones with no lifetime markers that we haven't looked at yet. We walk in
+  // reverse order so that we can set the representative allocas as those that
+  // dominate the others as we go.
+  const BasicBlock *EntryBB = &F->getEntryBlock();
+  for (BasicBlock::const_iterator BI = EntryBB->begin(), BE = EntryBB->end();
+       BI != BE; ++BI) {
+    const AllocaInst *AI = dyn_cast<AllocaInst>(BI);
+    if (!AI || !AI->isStaticAlloca()) continue;
+
+    AllocaMap::const_iterator I = Allocas.find(AI);
+    if (I != Allocas.end()) {
+      // An alloca with lifetime markers. Emit the record we've crafted for it,
+      // if we've chosen to keep it as a representative.
+      const AllocaInfo &Info = AllocasByIndex[I->second];
+      if (!Info.isForwarded()) {
+        SortedAllocas.push_back(Info);
+      }
+    } else {
+      // An alloca with no lifetime markers.
+      SortedAllocas.push_back(getInfo(AI));
+    }
+  }
+
+  // Sort the allocas to hopefully reduce padding.
+  array_pod_sort(SortedAllocas.begin(), SortedAllocas.end(), AllocaSort);
+
+  // Assign stack offsets.
+  uint64_t CurrentOffset = 0;
+  for (SmallVectorImpl<AllocaInfo>::const_iterator I = SortedAllocas.begin(),
+       E = SortedAllocas.end(); I != E; ++I) {
+    const AllocaInfo &Info = *I;
+    uint64_t NewOffset = RoundUpToAlignment(CurrentOffset, Info.getAlignment());
+
+    // For backwards compatibility, align every power-of-two multiple alloca to
+    // its greatest power-of-two factor, up to 8 bytes. In particular, cube2hash
+    // is known to depend on this.
+    // TODO: Consider disabling this and making people fix their code.
+    if (uint64_t Size = Info.getSize()) {
+      uint64_t P2 = uint64_t(1) << CountTrailingZeros_64(Size);
+      unsigned CompatAlign = unsigned(std::min(P2, uint64_t(8)));
+      NewOffset = RoundUpToAlignment(NewOffset, CompatAlign);
+    }
+
+    const AllocaInst *AI = Info.getInst();
+    StaticAllocas[AI] = StaticAllocation(AI, NewOffset);
+
+    CurrentOffset = NewOffset + Info.getSize();
+  }
+
+  // Add allocas that were represented by other allocas to the StaticAllocas map
+  // so that our clients can look them up.
+  for (unsigned i = 0, e = AllocasByIndex.size(); i != e; ++i) {
+    const AllocaInfo &Info = AllocasByIndex[i];
+    if (!Info.isForwarded()) continue;
+    size_t j = Info.getForwardedID();
+    assert(!AllocasByIndex[j].isForwarded());
+
+    StaticAllocaMap::const_iterator I =
+      StaticAllocas.find(AllocasByIndex[j].getInst());
+    assert(I != StaticAllocas.end());
+
+    std::pair<StaticAllocaMap::const_iterator, bool> Pair =
+      StaticAllocas.insert(std::make_pair(AllocasByIndex[i].getInst(),
+                                          I->second));
+    assert(Pair.second); (void)Pair;
+  }
+
+  // Record the final frame size. Keep the stack pointer 16-byte aligned.
+  FrameSize = CurrentOffset;
+  FrameSize = RoundUpToAlignment(FrameSize, 16);
+
+  DEBUG(dbgs() << "Allocas: "
+                  "Statically allocated frame size is " << FrameSize << "\n");
+}
+
+AllocaManager::AllocaManager() {
+}
+
+void AllocaManager::analyze(const Function &Func, const DataLayout &Layout,
+                            bool PerformColoring) {
+  NamedRegionTimer Timer("AllocaManager", TimePassesIsEnabled);
+  assert(Allocas.empty());
+  assert(AllocasByIndex.empty());
+  assert(AllocaCompatibility.empty());
+  assert(BlockLiveness.empty());
+  assert(StaticAllocas.empty());
+  assert(SortedAllocas.empty());
+
+  DL = &Layout;
+  F = &Func;
+
+  // Get the declarations for the lifetime intrinsics so we can quickly test to
+  // see if they are used at all, and for use later if they are.
+  const Module *M = F->getParent();
+  LifetimeStart = M->getFunction(Intrinsic::getName(Intrinsic::lifetime_start));
+  LifetimeEnd = M->getFunction(Intrinsic::getName(Intrinsic::lifetime_end));
+
+  // If we are optimizing and the module contains any lifetime intrinsics, run
+  // the alloca coloring algorithm.
+  if (PerformColoring &&
+      ((LifetimeStart && !LifetimeStart->use_empty()) ||
+       (LifetimeEnd   && !LifetimeEnd->use_empty()))) {
+
+    collectMarkedAllocas();
+
+    if (!AllocasByIndex.empty()) {
+      DEBUG(dbgs() << "Allocas: "
+                   << AllocasByIndex.size() << " marked allocas found\n");
+
+      collectBlocks();
+      computeInterBlockLiveness();
+      computeIntraBlockLiveness();
+      BlockLiveness.clear();
+
+      computeRepresentatives();
+      AllocaCompatibility.clear();
+    }
+  }
+
+  computeFrameOffsets();
+  SortedAllocas.clear();
+  Allocas.clear();
+  AllocasByIndex.clear();
+}
+
+void AllocaManager::clear() {
+  StaticAllocas.clear();
+}
+
+bool
+AllocaManager::getFrameOffset(const AllocaInst *AI, uint64_t *Offset) const {
+  assert(AI->isStaticAlloca());
+  StaticAllocaMap::const_iterator I = StaticAllocas.find(AI);
+  assert(I != StaticAllocas.end());
+  *Offset = I->second.Offset;
+  return AI == I->second.Representative;
+}
+
+const AllocaInst *
+AllocaManager::getRepresentative(const AllocaInst *AI) const {
+  assert(AI->isStaticAlloca());
+  StaticAllocaMap::const_iterator I = StaticAllocas.find(AI);
+  assert(I != StaticAllocas.end());
+  return I->second.Representative;
+}
diff --git a/lib/Target/JSBackend/AllocaManager.h b/lib/Target/JSBackend/AllocaManager.h
new file mode 100644
index 00000000000..44b07981bc3
--- /dev/null
+++ b/lib/Target/JSBackend/AllocaManager.h
@@ -0,0 +1,172 @@
+//===-- AllocaManager.h ---------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass declares the AllocaManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef JSBACKEND_ALLOCAMANAGER_H
+#define JSBACKEND_ALLOCAMANAGER_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SetVector.h"
+
+namespace llvm {
+
+class AllocaInst;
+class BasicBlock;
+class CallInst;
+class DataLayout;
+class Function;
+
+/// Compute frame layout for allocas.
+class AllocaManager {
+  const DataLayout *DL;
+  const Function *LifetimeStart;
+  const Function *LifetimeEnd;
+  const Function *F;
+
+  // Per-block lifetime information.
+  struct BlockLifetimeInfo {
+    BitVector Start;
+    BitVector End;
+    BitVector LiveIn;
+    BitVector LiveOut;
+  };
+  typedef DenseMap<const BasicBlock *, BlockLifetimeInfo> LivenessMap;
+  LivenessMap BlockLiveness;
+
+  // Worklist for inter-block liveness analysis.
+  typedef SmallSetVector<const BasicBlock *, 8> InterBlockWorklistVec;
+  InterBlockWorklistVec InterBlockWorklist;
+
+  // Map allocas to their index in AllocasByIndex.
+  typedef DenseMap<const AllocaInst *, size_t> AllocaMap;
+  AllocaMap Allocas;
+
+  // Information about an alloca. Note that the size and alignment may vary
+  // from what's in the actual AllocaInst when an alloca is also representing
+  // another with perhaps greater size and/or alignment needs.
+  //
+  // When an alloca is represented by another, its AllocaInfo is marked as
+  // "forwarded", at which point it no longer holds a size and alignment, but
+  // the index of the representative AllocaInfo.
+  class AllocaInfo {
+    const AllocaInst *Inst;
+    uint64_t Size;
+    unsigned Alignment;
+
+  public:
+    AllocaInfo(const AllocaInst *I, uint64_t S, unsigned A)
+      : Inst(I), Size(S), Alignment(A) {
+      assert(I != NULL);
+      assert(A != 0);
+      assert(!isForwarded());
+    }
+
+    bool isForwarded() const { return Alignment == 0; }
+
+    size_t getForwardedID() const {
+      assert(isForwarded());
+      return static_cast<size_t>(Size);
+    }
+
+    void forward(size_t i) {
+      assert(!isForwarded());
+      Alignment = 0;
+      Size = i;
+      assert(isForwarded());
+      assert(getForwardedID() == i);
+    }
+
+    const AllocaInst *getInst() const { return Inst; }
+
+    uint64_t getSize() const { assert(!isForwarded()); return Size; }
+    unsigned getAlignment() const { assert(!isForwarded()); return Alignment; }
+
+    void mergeSize(uint64_t S) {
+      assert(!isForwarded());
+      Size = std::max(Size, S);
+      assert(!isForwarded());
+    }
+    void mergeAlignment(unsigned A) {
+      assert(A != 0);
+      assert(!isForwarded());
+      Alignment = std::max(Alignment, A);
+      assert(!isForwarded());
+    }
+  };
+  typedef SmallVector<AllocaInfo, 32> AllocaVec;
+  AllocaVec AllocasByIndex;
+
+  // For each alloca, which allocas can it safely represent? Allocas are
+  // identified by AllocasByIndex index.
+  // TODO: Vector-of-vectors isn't the fastest data structure possible here.
+  typedef SmallVector<BitVector, 32> AllocaCompatibilityVec;
+  AllocaCompatibilityVec AllocaCompatibility;
+
+  // This is for allocas that will eventually be sorted.
+  SmallVector<AllocaInfo, 32> SortedAllocas;
+
+  // Static allocation results.
+  struct StaticAllocation {
+    const AllocaInst *Representative;
+    uint64_t Offset;
+    StaticAllocation() {}
+    StaticAllocation(const AllocaInst *A, uint64_t O)
+      : Representative(A), Offset(O) {}
+  };
+  typedef DenseMap<const AllocaInst *, StaticAllocation> StaticAllocaMap;
+  StaticAllocaMap StaticAllocas;
+  uint64_t FrameSize;
+
+  uint64_t getSize(const AllocaInst *AI);
+  unsigned getAlignment(const AllocaInst *AI);
+  AllocaInfo getInfo(const AllocaInst *AI);
+  const AllocaInst *getAllocaFromIntrinsic(const CallInst *CI);
+  static int AllocaSort(const void *l, const void *r);
+
+  void collectMarkedAllocas();
+  void collectBlocks();
+  void computeInterBlockLiveness();
+  void computeIntraBlockLiveness();
+  void computeRepresentatives();
+  void computeFrameOffsets();
+
+public:
+  AllocaManager();
+
+  /// Analyze the given function and prepare for getRepresentative queries.
+  void analyze(const Function &Func, const DataLayout &Layout,
+               bool PerformColoring);
+
+  /// Reset all stored state.
+  void clear();
+
+  /// Return the representative alloca for the given alloca. When allocas are
+  /// merged, one is chosen as the representative to stand for the rest.
+  /// References to the alloca should take the form of references to the
+  /// representative.
+  const AllocaInst *getRepresentative(const AllocaInst *AI) const;
+
+  /// Set *offset to the frame offset for the given alloca. Return true if the
+  /// given alloca is representative, meaning that it needs an explicit
+  /// definition in the function entry. Return false if some other alloca
+  /// represents this one.
+  bool getFrameOffset(const AllocaInst *AI, uint64_t *offset) const;
+
+  /// Return the total frame size for all static allocas and associated padding.
+  uint64_t getFrameSize() const { return FrameSize; }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/JSBackend/CMakeLists.txt b/lib/Target/JSBackend/CMakeLists.txt
index 37704f316fd..24622e1c3f2 100644
--- a/lib/Target/JSBackend/CMakeLists.txt
+++ b/lib/Target/JSBackend/CMakeLists.txt
@@ -1,8 +1,13 @@
 add_llvm_target(JSBackendCodeGen
+  AllocaManager.cpp
   ExpandI64.cpp
   JSBackend.cpp
+  JSTargetMachine.cpp
   Relooper.cpp
   SimplifyAllocas.cpp
   )
 
+add_dependencies(LLVMJSBackendCodeGen intrinsics_gen)
+
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/JSBackend/CallHandlers.h b/lib/Target/JSBackend/CallHandlers.h
index 93275b9637f..8ae8c26c1d8 100644
--- a/lib/Target/JSBackend/CallHandlers.h
+++ b/lib/Target/JSBackend/CallHandlers.h
@@ -5,7 +5,7 @@
 
 typedef std::string (JSWriter::*CallHandler)(const Instruction*, std::string Name, int NumArgs);
 typedef std::map<std::string, CallHandler> CallHandlerMap;
-CallHandlerMap *CallHandlers;
+CallHandlerMap CallHandlers;
 
 // Definitions
 
@@ -44,6 +44,36 @@ DEF_CALL_HANDLER(__default__, {
   if (F) {
     NeedCasts = F->isDeclaration(); // if ffi call, need casts
     FT = F->getFunctionType();
+    if (EmscriptenAssertions) {
+      if (!FT->isVarArg()) {
+        unsigned TypeNumArgs = FT->getNumParams();
+        unsigned ActualNumArgs = getNumArgOperands(CI);
+        if (TypeNumArgs != ActualNumArgs) {
+          errs().changeColor(raw_ostream::YELLOW);
+          errs() << "warning:";
+          errs().resetColor();
+          errs() << " unexpected number of arguments " << utostr(ActualNumArgs) << " in call to '" << F->getName() << "', should be " << utostr(TypeNumArgs) << "\n";
+        }
+        for (unsigned i = 0; i < std::min(TypeNumArgs, ActualNumArgs); i++) {
+          Type *TypeType = FT->getParamType(i);
+          Type *ActualType = CI->getOperand(i)->getType();
+          if (getFunctionSignatureLetter(TypeType) != getFunctionSignatureLetter(ActualType)) {
+            errs().changeColor(raw_ostream::YELLOW);
+            errs() << "warning:";
+            errs().resetColor();
+            errs() << " unexpected argument type " << *ActualType << " at index " << utostr(i) << " in call to '" << F->getName() << "', should be " << *TypeType << "\n";
+          }
+        }
+      }
+      Type *TypeType = FT->getReturnType();
+      Type *ActualType = CI->getType();
+      if (getFunctionSignatureLetter(TypeType) != getFunctionSignatureLetter(ActualType)) {
+        errs().changeColor(raw_ostream::YELLOW);
+        errs() << "warning:";
+        errs().resetColor();
+        errs() << " unexpected return type " << *ActualType << " in call to '" << F->getName() << "', should be " << *TypeType << "\n";
+      }
+    }
   } else {
     if (isAbsolute(CV)) return "abort(); /* segfault, call an absolute addr */";
     // function pointer call
@@ -71,8 +101,14 @@ DEF_CALL_HANDLER(__default__, {
     }
     if (NumArgs > 0) text += ",";
   }
-  // this is an ffi call if we need casts, and it is not a Math_ builtin (with just 1 arg - Math with more args is different XXX)
-  bool FFI = NeedCasts && (NumArgs > 1 || Name.find("Math_") != 0);
+  // this is an ffi call if we need casts, and it is not a special Math_ builtin
+  bool FFI = NeedCasts;
+  if (FFI && Name.find("Math_") == 0) {
+    if (Name == "Math_ceil" || Name == "Math_floor" || Name == "Math_min" || Name == "Math_max" || Name == "Math_sqrt" || Name == "Math_abs") {
+      // This special Math builtin is optimizable with all types, including floats, so can treat it as non-ffi
+      FFI = false;
+    }
+  }
   unsigned FFI_OUT = FFI ? ASM_FFI_OUT : 0;
   for (int i = 0; i < NumArgs; i++) {
     if (!NeedCasts) {
@@ -153,6 +189,12 @@ DEF_CALL_HANDLER(emscripten_get_longjmp_result, {
   return getAssign(CI) + "tempRet0";
 })
 
+// emscripten instrinsics
+DEF_CALL_HANDLER(emscripten_debugger, {
+  CanValidate = false;
+  return "debugger";
+})
+
 // i64 support
 
 DEF_CALL_HANDLER(getHigh32, {
@@ -221,8 +263,6 @@ DEF_CALL_HANDLER(llvm_nacl_atomic_store_i32, {
 #define WRITE_LOOP_MAX 128
 
 DEF_CALL_HANDLER(llvm_memcpy_p0i8_p0i8_i32, {
-  Declares.insert("memcpy");
-  Redirects["llvm_memcpy_p0i8_p0i8_i32"] = "memcpy";
   if (CI) {
     ConstantInt *AlignInt = dyn_cast<ConstantInt>(CI->getOperand(3));
     if (AlignInt) {
@@ -254,7 +294,7 @@ DEF_CALL_HANDLER(llvm_memcpy_p0i8_p0i8_i32, {
             } else {
               // emit a loop
               UsedVars["dest"] = UsedVars["src"] = UsedVars["stop"] = Type::getInt32Ty(TheModule->getContext())->getTypeID();
-              Ret += "dest=" + Dest + "+" + utostr(Pos) + "|0; src=" + Src + "+" + utostr(Pos) + "|0; stop=dest+" + utostr(CurrLen) + "|0; do { " + getHeapAccess("dest", Align) + "=" + getHeapAccess("src", Align) + "|0; dest=dest+" + utostr(Align) + "|0; src=src+" + utostr(Align) + "|0; } while ((dest|0) < (stop|0));";
+              Ret += "dest=" + Dest + "+" + utostr(Pos) + "|0; src=" + Src + "+" + utostr(Pos) + "|0; stop=dest+" + utostr(CurrLen) + "|0; do { " + getHeapAccess("dest", Align) + "=" + getHeapAccess("src", Align) + "|0; dest=dest+" + utostr(Align) + "|0; src=src+" + utostr(Align) + "|0; } while ((dest|0) < (stop|0))";
             }
             Pos += CurrLen;
             Len -= CurrLen;
@@ -265,12 +305,11 @@ DEF_CALL_HANDLER(llvm_memcpy_p0i8_p0i8_i32, {
       }
     }
   }
+  Declares.insert("memcpy");
   return CH___default__(CI, "_memcpy", 3) + "|0";
 })
 
 DEF_CALL_HANDLER(llvm_memset_p0i8_i32, {
-  Declares.insert("memset");
-  Redirects["llvm_memset_p0i8_i32"] = "memset";
   if (CI) {
     ConstantInt *AlignInt = dyn_cast<ConstantInt>(CI->getOperand(3));
     if (AlignInt) {
@@ -309,7 +348,7 @@ DEF_CALL_HANDLER(llvm_memset_p0i8_i32, {
               } else {
                 // emit a loop
                 UsedVars["dest"] = UsedVars["stop"] = Type::getInt32Ty(TheModule->getContext())->getTypeID();
-                Ret += "dest=" + Dest + "+" + utostr(Pos) + "|0; stop=dest+" + utostr(CurrLen) + "|0; do { " + getHeapAccess("dest", Align) + "=" + utostr(FullVal) + "|0; dest=dest+" + utostr(Align) + "|0; } while ((dest|0) < (stop|0));";
+                Ret += "dest=" + Dest + "+" + utostr(Pos) + "|0; stop=dest+" + utostr(CurrLen) + "|0; do { " + getHeapAccess("dest", Align) + "=" + utostr(FullVal) + "|0; dest=dest+" + utostr(Align) + "|0; } while ((dest|0) < (stop|0))";
               }
               Pos += CurrLen;
               Len -= CurrLen;
@@ -321,12 +360,12 @@ DEF_CALL_HANDLER(llvm_memset_p0i8_i32, {
       }
     }
   }
+  Declares.insert("memset");
   return CH___default__(CI, "_memset", 3) + "|0";
 })
 
 DEF_CALL_HANDLER(llvm_memmove_p0i8_p0i8_i32, {
   Declares.insert("memmove");
-  Redirects["llvm_memmove_p0i8_p0i8_i32"] = "memmove";
   return CH___default__(CI, "_memmove", 3) + "|0";
 })
 
@@ -450,13 +489,11 @@ DEF_CALL_HANDLER(name, { \
   /* FIXME: do not redirect if this is implemented and not just a declare! */ \
   Declares.insert(#to); \
   Redirects[#name] = #to; \
-  if (!CI) return ""; \
   return CH___default__(CI, "_" #to); \
 })
 
 #define DEF_BUILTIN_HANDLER(name, to) \
 DEF_CALL_HANDLER(name, { \
-  if (!CI) return ""; \
   return CH___default__(CI, #to); \
 })
 
@@ -702,9 +739,9 @@ DEF_REDIRECT_HANDLER(SDL_RWFromMem, SDL_RWFromConstMem);
 // Setups
 
 void setupCallHandlers() {
-  CallHandlers = new CallHandlerMap;
+  assert(CallHandlers.empty());
   #define SETUP_CALL_HANDLER(Ident) \
-    (*CallHandlers)["_" #Ident] = &JSWriter::CH_##Ident;
+    CallHandlers["_" #Ident] = &JSWriter::CH_##Ident;
 
   SETUP_CALL_HANDLER(__default__);
   SETUP_CALL_HANDLER(emscripten_preinvoke);
@@ -716,6 +753,7 @@ void setupCallHandlers() {
   SETUP_CALL_HANDLER(emscripten_longjmp);
   SETUP_CALL_HANDLER(emscripten_check_longjmp);
   SETUP_CALL_HANDLER(emscripten_get_longjmp_result);
+  SETUP_CALL_HANDLER(emscripten_debugger);
   SETUP_CALL_HANDLER(getHigh32);
   SETUP_CALL_HANDLER(setHigh32);
   SETUP_CALL_HANDLER(FtoILow);
@@ -1006,7 +1044,9 @@ void setupCallHandlers() {
 
 std::string handleCall(const Instruction *CI) {
   const Value *CV = getActuallyCalledValue(CI);
-  assert(!isa<InlineAsm>(CV) && "asm() not supported, use EM_ASM() (see emscripten.h)");
+  if (isa<InlineAsm>(CV)) {
+    report_fatal_error("asm() not supported, use EM_ASM() (see emscripten.h)");
+  }
 
   // Get the name to call this function by. If it's a direct call, meaning
   // which know which Function we're calling, avoid calling getValueAsStr, as
@@ -1014,10 +1054,10 @@ std::string handleCall(const Instruction *CI) {
   const std::string &Name = isa<Function>(CV) ? getJSName(CV) : getValueAsStr(CV);
 
   unsigned NumArgs = getNumArgOperands(CI);
-  CallHandlerMap::iterator CH = CallHandlers->find("___default__");
+  CallHandlerMap::iterator CH = CallHandlers.find("___default__");
   if (isa<Function>(CV)) {
-    CallHandlerMap::iterator Custom = CallHandlers->find(Name);
-    if (Custom != CallHandlers->end()) CH = Custom;
+    CallHandlerMap::iterator Custom = CallHandlers.find(Name);
+    if (Custom != CallHandlers.end()) CH = Custom;
   }
   return (this->*(CH->second))(CI, Name, NumArgs);
 }
diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index 8a811b69589..d4fbdad1ee7 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -15,13 +15,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "JSTargetMachine.h"
+#include "MCTargetDesc/JSBackendMCTargetDesc.h"
+#include "AllocaManager.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/config.h"
-#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InlineAsm.h"
@@ -30,9 +31,6 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/CallSite.h"
@@ -40,6 +38,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/DebugInfo.h"
 #include <algorithm>
@@ -51,10 +50,6 @@ using namespace llvm;
 #include <OptPasses.h>
 #include <Relooper.h>
 
-#ifdef _MSC_VER
-#define snprintf _snprintf
-#endif
-
 #ifdef NDEBUG
 #undef assert
 #define assert(x) { if (!(x)) report_fatal_error(#x); }
@@ -75,6 +70,11 @@ ReservedFunctionPointers("emscripten-reserved-function-pointers",
                          cl::desc("Number of reserved slots in function tables for functions to be added at runtime (see emscripten RESERVED_FUNCTION_POINTERS option)"),
                          cl::init(0));
 
+static cl::opt<int>
+EmscriptenAssertions("emscripten-assertions",
+                     cl::desc("Additional JS-specific assertions (see emscripten ASSERTIONS)"),
+                     cl::init(0));
+
 extern "C" void LLVMInitializeJSBackendTarget() {
   // Register the target.
   RegisterTargetMachine<JSTargetMachine> X(TheJSBackendTarget);
@@ -96,7 +96,6 @@ namespace {
   typedef std::vector<unsigned char> HeapData;
   typedef std::pair<unsigned, unsigned> Address;
   typedef std::map<std::string, Type::TypeID> VarMap;
-  typedef std::map<const AllocaInst*, unsigned> AllocaIntMap;
   typedef std::map<std::string, Address> GlobalAddressMap;
   typedef std::vector<std::string> FunctionTable;
   typedef std::map<std::string, FunctionTable> FunctionTableMap;
@@ -114,8 +113,7 @@ namespace {
     unsigned UniqueNum;
     ValueMap ValueNames;
     VarMap UsedVars;
-    AllocaIntMap StackAllocs;
-    unsigned TotalStackAllocs;
+    AllocaManager Allocas;
     HeapData GlobalData8;
     HeapData GlobalData32;
     HeapData GlobalData64;
@@ -131,15 +129,19 @@ namespace {
     std::vector<std::string> Exports; // additional exports
     BlockAddressMap BlockAddresses;
 
+    bool CanValidate;
     bool UsesSIMD;
     int InvokeState; // cycles between 0, 1 after preInvoke, 2 after call, 0 again after postInvoke. hackish, no argument there.
+    CodeGenOpt::Level OptLevel;
     DataLayout *DL;
 
     #include "CallHandlers.h"
 
   public:
     static char ID;
-    explicit JSWriter(formatted_raw_ostream &o) : ModulePass(ID), Out(o), UniqueNum(0), UsesSIMD(false), InvokeState(0) {}
+    JSWriter(formatted_raw_ostream &o, CodeGenOpt::Level OptLevel)
+      : ModulePass(ID), Out(o), UniqueNum(0), CanValidate(true), UsesSIMD(false), InvokeState(0),
+        OptLevel(OptLevel) {}
 
     virtual const char *getPassName() const { return "JavaScript backend"; }
 
@@ -167,12 +169,14 @@ namespace {
 
     #define MEM_ALIGN 8
     #define MEM_ALIGN_BITS 64
+    #define STACK_ALIGN 16
+    #define STACK_ALIGN_BITS 128
 
-    unsigned memAlign(unsigned x) {
-      return x + (x%MEM_ALIGN != 0 ? MEM_ALIGN - x%MEM_ALIGN : 0);
+    unsigned stackAlign(unsigned x) {
+      return RoundUpToAlignment(x, STACK_ALIGN);
     }
-    std::string memAlignStr(std::string x) {
-      return "((" + x + "+" + utostr(MEM_ALIGN-1) + ")&-" + utostr(MEM_ALIGN) + ")";
+    std::string stackAlignStr(std::string x) {
+      return "((" + x + "+" + utostr(STACK_ALIGN-1) + ")&-" + utostr(STACK_ALIGN) + ")";
     }
 
     HeapData *allocateAddress(const std::string& Name, unsigned Bits = MEM_ALIGN_BITS) {
@@ -258,14 +262,6 @@ namespace {
       } else return 'i';
     }
     std::string getFunctionSignature(const FunctionType *F, const std::string *Name=NULL) {
-      if (Name) {
-        // special-case some function signatures, because of how we emit code for them FIXME this is hackish
-        if (*Name == "_llvm_memcpy_p0i8_p0i8_i32"  || *Name == "_memcpy" ||
-            *Name == "_llvm_memset_p0i8_i32"       || *Name == "_memset" ||
-            *Name == "_llvm_memmove_p0i8_p0i8_i32" || *Name == "_memmove") {
-          return "iiii";
-        }
-      }
       std::string Ret;
       Ret += getFunctionSignatureLetter(F->getReturnType());
       for (FunctionType::param_iterator AI = F->param_begin(),
@@ -294,8 +290,8 @@ namespace {
       IndexedFunctions[Name] = Index;
 
       // invoke the callHandler for this, if there is one. the function may only be indexed but never called directly, and we may need to do things in the handler
-      CallHandlerMap::const_iterator CH = CallHandlers->find(Name);
-      if (CH != CallHandlers->end()) {
+      CallHandlerMap::const_iterator CH = CallHandlers.find(Name);
+      if (CH != CallHandlers.end()) {
         (this->*(CH->second))(NULL, Name, -1);
       }
 
@@ -314,9 +310,26 @@ namespace {
       return getBlockAddress(BA->getFunction(), BA->getBasicBlock());
     }
 
+    const Value *resolveFully(const Value *V) {
+      bool More = true;
+      while (More) {
+        More = false;
+        if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+          V = GA->getAliasee();
+          More = true;
+        }
+        if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+          V = CE->getOperand(0); // ignore bitcasts
+          More = true;
+        }
+      }
+      return V;
+    }
+
     // Return a constant we are about to write into a global as a numeric offset. If the
     // value is not known at compile time, emit a postSet to that location.
     unsigned getConstAsOffset(const Value *V, unsigned AbsoluteTarget) {
+      V = resolveFully(V);
       if (const Function *F = dyn_cast<const Function>(V)) {
         return getFunctionIndex(F);
       } else if (const BlockAddress *BA = dyn_cast<const BlockAddress>(V)) {
@@ -352,6 +365,7 @@ namespace {
       assert(VT->getElementType()->getPrimitiveSizeInBits() == 32);
       assert(VT->getNumElements() == 4);
       UsesSIMD = true;
+      CanValidate = false;
     }
 
     std::string getPtrLoad(const Value* Ptr);
@@ -422,6 +436,10 @@ static inline char halfCharToHex(unsigned char half) {
 }
 
 static inline void sanitizeGlobal(std::string& str) {
+  // Global names are prefixed with "_" to prevent them from colliding with
+  // names of things in normal JS.
+  str = "_" + str;
+
   // functions and globals should already be in C-style format,
   // in addition to . for llvm intrinsics and possibly $ and so forth.
   // There is a risk of collisions here, we just lower all these
@@ -435,6 +453,10 @@ static inline void sanitizeGlobal(std::string& str) {
 }
 
 static inline void sanitizeLocal(std::string& str) {
+  // Local names are prefixed with "$" to prevent them from colliding with
+  // global names.
+  str = "$" + str;
+
   // We need to convert every string that is not a valid JS identifier into
   // a valid one, without collisions - we cannot turn "x.a" into "x_a" while
   // also leaving "x_a" as is, for example.
@@ -510,7 +532,10 @@ std::string JSWriter::getPhiCode(const BasicBlock *From, const BasicBlock *To) {
     // we found it
     const std::string &name = getJSName(P);
     assigns[name] = getAssign(P);
-    const Value *V = P->getIncomingValue(index);
+    // Get the operand, and strip pointer casts, since normal expression
+    // translation also strips pointer casts, and we want to see the same
+    // thing so that we can detect any resulting dependencies.
+    const Value *V = P->getIncomingValue(index)->stripPointerCasts();
     values[name] = V;
     std::string vname = getValueAsStr(V);
     if (const Instruction *VI = dyn_cast<const Instruction>(V)) {
@@ -556,18 +581,29 @@ const std::string &JSWriter::getJSName(const Value* val) {
   if (I != ValueNames.end() && I->first == val)
     return I->second;
 
+  // If this is an alloca we've replaced with another, use the other name.
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(val)) {
+    if (AI->isStaticAlloca()) {
+      const AllocaInst *Rep = Allocas.getRepresentative(AI);
+      if (Rep != AI) {
+        return getJSName(Rep);
+      }
+    }
+  }
+
   std::string name;
   if (val->hasName()) {
-    if (isa<Function>(val) || isa<Constant>(val)) {
-      name = std::string("_") + val->getName().str();
-      sanitizeGlobal(name);
-    } else {
-      name = std::string("$") + val->getName().str();
-      sanitizeLocal(name);
-    }
+    name = val->getName().str();
   } else {
-    name = "u$" + utostr(UniqueNum++);
+    name = utostr(UniqueNum++);
   }
+
+  if (isa<Constant>(val)) {
+    sanitizeGlobal(name);
+  } else {
+    sanitizeLocal(name);
+  }
+
   return ValueNames[val] = name;
 }
 
@@ -931,6 +967,11 @@ std::string JSWriter::getConstant(const Constant* CV, AsmCast sign) {
       Externals.insert(Name);
       return Name;
     }
+    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(CV)) {
+      // Since we don't currently support linking of our output, we don't need
+      // to worry about weak or other kinds of aliases.
+      return getConstant(GA->getAliasee(), sign);
+    }
     return utostr(getGlobalAddress(GV->getName().str()));
   }
 
@@ -998,6 +1039,9 @@ std::string JSWriter::getConstant(const Constant* CV, AsmCast sign) {
 }
 
 std::string JSWriter::getValueAsStr(const Value* V, AsmCast sign) {
+  // Skip past no-op bitcasts and zero-index geps.
+  V = V->stripPointerCasts();
+
   if (const Constant *CV = dyn_cast<Constant>(V)) {
     return getConstant(CV, sign);
   } else {
@@ -1006,6 +1050,9 @@ std::string JSWriter::getValueAsStr(const Value* V, AsmCast sign) {
 }
 
 std::string JSWriter::getValueAsCastStr(const Value* V, AsmCast sign) {
+  // Skip past no-op bitcasts and zero-index geps.
+  V = V->stripPointerCasts();
+
   if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
     return getConstant(cast<Constant>(V), sign);
   } else {
@@ -1014,6 +1061,9 @@ std::string JSWriter::getValueAsCastStr(const Value* V, AsmCast sign) {
 }
 
 std::string JSWriter::getValueAsParenStr(const Value* V) {
+  // Skip past no-op bitcasts and zero-index geps.
+  V = V->stripPointerCasts();
+
   if (const Constant *CV = dyn_cast<Constant>(V)) {
     return getConstant(CV);
   } else {
@@ -1022,6 +1072,9 @@ std::string JSWriter::getValueAsParenStr(const Value* V) {
 }
 
 std::string JSWriter::getValueAsCastParenStr(const Value* V, AsmCast sign) {
+  // Skip past no-op bitcasts and zero-index geps.
+  V = V->stripPointerCasts();
+
   if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
     return getConstant(cast<Constant>(V), sign);
   } else {
@@ -1154,6 +1207,12 @@ static uint64_t LSBMask(unsigned numBits) {
 
 // Generate code for and operator, either an Instruction or a ConstantExpr.
 void JSWriter::generateExpression(const User *I, raw_string_ostream& Code) {
+  // To avoid emiting code and variables for the no-op pointer bitcasts
+  // and all-zero-index geps that LLVM needs to satisfy its type system, we
+  // call stripPointerCasts() on all values before translating them. This
+  // includes bitcasts whose only use is lifetime marker intrinsics.
+  assert(I == I->stripPointerCasts());
+
   Type *T = I->getType();
   if (T->isIntegerTy() && T->getIntegerBitWidth() > 32) {
     errs() << *I << "\n";
@@ -1341,30 +1400,39 @@ void JSWriter::generateExpression(const User *I, raw_string_ostream& Code) {
     break;
   }
   case Instruction::Alloca: {
-    if (NativizedVars.count(I)) {
+    const AllocaInst* AI = cast<AllocaInst>(I);
+
+    if (NativizedVars.count(AI)) {
       // nativized stack variable, we just need a 'var' definition
-      UsedVars[getJSName(I)] = cast<PointerType>(I->getType())->getElementType()->getTypeID();
+      UsedVars[getJSName(AI)] = AI->getType()->getElementType()->getTypeID();
       return;
     }
-    const AllocaInst* AI = cast<AllocaInst>(I);
-    AllocaIntMap::iterator AIMI = StackAllocs.find(AI);
-    if (AIMI != StackAllocs.end()) {
-      // fixed-size allocation that is already taken into account in the big initial allocation
-      if (AIMI->second) {
-        Code << getAssign(AI) << "sp + " << utostr(AIMI->second) << "|0";
-      } else {
-        Code << getAssign(AI) << "sp";
+
+    // Fixed-size entry-block allocations are allocated all at once in the
+    // function prologue.
+    if (AI->isStaticAlloca()) {
+      uint64_t Offset;
+      if (Allocas.getFrameOffset(AI, &Offset)) {
+        if (Offset != 0) {
+          Code << getAssign(AI) << "sp + " << Offset << "|0";
+        } else {
+          Code << getAssign(AI) << "sp";
+        }
+        break;
       }
-      break;
+      // Otherwise, this alloca is being represented by another alloca, so
+      // there's nothing to print.
+      return;
     }
+
     Type *T = AI->getAllocatedType();
     std::string Size;
     uint64_t BaseSize = DL->getTypeAllocSize(T);
     const Value *AS = AI->getArraySize();
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(AS)) {
-      Size = Twine(memAlign(BaseSize * CI->getZExtValue())).str();
+      Size = Twine(stackAlign(BaseSize * CI->getZExtValue())).str();
     } else {
-      Size = memAlignStr("((" + utostr(BaseSize) + '*' + getValueAsStr(AS) + ")|0)");
+      Size = stackAlignStr("((" + utostr(BaseSize) + '*' + getValueAsStr(AS) + ")|0)");
     }
     Code << getAssign(AI) << "STACKTOP; STACKTOP = STACKTOP + " << Size << "|0";
     break;
@@ -1463,7 +1531,7 @@ void JSWriter::generateExpression(const User *I, raw_string_ostream& Code) {
     case Instruction::ZExt:     Code << getValueAsCastStr(I->getOperand(0), ASM_UNSIGNED); break;
     case Instruction::FPExt: {
       if (PreciseF32) {
-        Code << "+" + getValueAsStr(I->getOperand(0)); break;
+        Code << "+" << getValueAsStr(I->getOperand(0)); break;
       } else {
         Code << getValueAsStr(I->getOperand(0)); break;
       }
@@ -1491,10 +1559,10 @@ void JSWriter::generateExpression(const User *I, raw_string_ostream& Code) {
     std::string V = getValueAsStr(I->getOperand(0));
     if (InType->isIntegerTy() && OutType->isFloatingPointTy()) {
       assert(InType->getIntegerBitWidth() == 32);
-      Code << "(HEAP32[tempDoublePtr>>2]=" << V << "," << getCast("HEAPF32[tempDoublePtr>>2]", Type::getFloatTy(TheModule->getContext())) + ")";
+      Code << "(HEAP32[tempDoublePtr>>2]=" << V << "," << getCast("HEAPF32[tempDoublePtr>>2]", Type::getFloatTy(TheModule->getContext())) << ")";
     } else if (OutType->isIntegerTy() && InType->isFloatingPointTy()) {
       assert(OutType->getIntegerBitWidth() == 32);
-      Code << "(HEAPF32[tempDoublePtr>>2]=" << V << "," << "HEAP32[tempDoublePtr>>2]|0)";
+      Code << "(HEAPF32[tempDoublePtr>>2]=" << V << "," "HEAP32[tempDoublePtr>>2]|0)";
     } else {
       Code << V;
     }
@@ -1586,7 +1654,9 @@ void JSWriter::addBlock(const BasicBlock *BB, Relooper& R, LLVMToRelooperMap& LL
   raw_string_ostream CodeStream(Code);
   for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
        I != E; ++I) {
-    generateExpression(I, CodeStream);
+    if (I->stripPointerCasts() == I) {
+      generateExpression(I, CodeStream);
+    }
   }
   CodeStream.flush();
   const Value* Condition = considerConditionVar(BB->getTerminator());
@@ -1746,8 +1816,8 @@ void JSWriter::printFunctionBody(const Function *F) {
 
   // Emit stack entry
   Out << " " << getAdHocAssign("sp", Type::getInt32Ty(F->getContext())) << "STACKTOP;";
-  if (TotalStackAllocs) {
-    Out << "\n " << "STACKTOP = STACKTOP + " + utostr(TotalStackAllocs) + "|0;";
+  if (uint64_t FrameSize = Allocas.getFrameSize()) {
+    Out << "\n " "STACKTOP = STACKTOP + " << FrameSize << "|0;";
   }
 
   // Emit (relooped) code
@@ -1786,59 +1856,24 @@ void JSWriter::processConstants() {
 void JSWriter::printFunction(const Function *F) {
   ValueNames.clear();
 
-  // Ensure all arguments and locals are named (we assume used values need names, which might be false if the optimizer did not run)
-  unsigned Next = 1;
-  for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-       AI != AE; ++AI) {
-    if (!AI->hasName() && !AI->use_empty()) {
-      ValueNames[AI] = "$" + utostr(Next++);
-    }
-  }
-  for (Function::const_iterator BI = F->begin(), BE = F->end();
-       BI != BE; ++BI) {
-    for (BasicBlock::const_iterator II = BI->begin(), E = BI->end();
-         II != E; ++II) {
-      if (!II->hasName() && !II->use_empty()) {
-        ValueNames[II] = "$" + utostr(Next++);
-      }
-    }
-  }
-
   // Prepare and analyze function
 
   UsedVars.clear();
   UniqueNum = 0;
-  calculateNativizedVars(F);
 
-  StackAllocs.clear();
-  TotalStackAllocs = 0;
+  // When optimizing, the regular optimizer (mem2reg, SROA, GVN, and others)
+  // will have already taken all the opportunities for nativization.
+  if (OptLevel == CodeGenOpt::None)
+    calculateNativizedVars(F);
 
-  for (Function::const_iterator BI = F->begin(), BE = F->end(); BI != BE; ++BI) {
-    for (BasicBlock::const_iterator II = BI->begin(), E = BI->end(); II != E; ++II) {
-      if (const AllocaInst* AI = dyn_cast<AllocaInst>(II)) {
-        Type *T = AI->getAllocatedType();
-        const Value *AS = AI->getArraySize();
-        unsigned BaseSize = DL->getTypeAllocSize(T);
-        if (const ConstantInt *CI = dyn_cast<ConstantInt>(AS)) {
-          // TODO: group by alignment to avoid unnecessary padding
-          unsigned Size = memAlign(BaseSize * CI->getZExtValue());
-          StackAllocs[AI] = TotalStackAllocs;
-          TotalStackAllocs += Size;
-        }
-      } else {
-        // stop after the first non-alloca - could alter the stack
-        // however, ptrtoints are ok, and the legalizaton passes introduce them
-        if (!isa<PtrToIntInst>(II)) break;
-      }
-    }
-    break;
-  }
+  // Do alloca coloring at -O1 and higher.
+  Allocas.analyze(*F, *DL, OptLevel != CodeGenOpt::None);
 
   // Emit the function
 
   std::string Name = F->getName();
   sanitizeGlobal(Name);
-  Out << "function _" << Name << "(";
+  Out << "function " << Name << "(";
   for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
        AI != AE; ++AI) {
     if (AI != F->arg_begin()) Out << ",";
@@ -1855,6 +1890,8 @@ void JSWriter::printFunction(const Function *F) {
   printFunctionBody(F);
   Out << "}";
   nl(Out);
+
+  Allocas.clear();
 }
 
 void JSWriter::printModuleBody() {
@@ -1864,21 +1901,6 @@ void JSWriter::printModuleBody() {
   nl(Out) << "// EMSCRIPTEN_START_FUNCTIONS"; nl(Out);
   for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
        I != E; ++I) {
-    // Ignore intrinsics that are always no-ops. We don't emit any code for
-    // them, so we don't need to declare them.
-    if (I->isIntrinsic()) {
-      switch (I->getIntrinsicID()) {
-      case Intrinsic::dbg_declare:
-      case Intrinsic::dbg_value:
-      case Intrinsic::lifetime_start:
-      case Intrinsic::lifetime_end:
-      case Intrinsic::invariant_start:
-      case Intrinsic::invariant_end:
-      case Intrinsic::prefetch:
-        continue;
-      }
-    }
-
     if (!I->isDeclaration()) printFunction(I);
   }
   Out << "function runPostSets() {\n";
@@ -1911,6 +1933,26 @@ void JSWriter::printModuleBody() {
   for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
        I != E; ++I) {
     if (I->isDeclaration() && !I->use_empty()) {
+      // Ignore intrinsics that are always no-ops or expanded into other code
+      // which doesn't require the intrinsic function itself to be declared.
+      if (I->isIntrinsic()) {
+        switch (I->getIntrinsicID()) {
+        case Intrinsic::dbg_declare:
+        case Intrinsic::dbg_value:
+        case Intrinsic::lifetime_start:
+        case Intrinsic::lifetime_end:
+        case Intrinsic::invariant_start:
+        case Intrinsic::invariant_end:
+        case Intrinsic::prefetch:
+        case Intrinsic::memcpy:
+        case Intrinsic::memset:
+        case Intrinsic::memmove:
+        case Intrinsic::expect:
+        case Intrinsic::flt_rounds:
+          continue;
+        }
+      }
+
       if (first) {
         first = false;
       } else {
@@ -2014,6 +2056,10 @@ void JSWriter::printModuleBody() {
   }
   Out << "],";
 
+  Out << "\"canValidate\": ";
+  Out << (CanValidate ? "1" : "0");
+  Out << ",";
+
   Out << "\"simd\": ";
   Out << (UsesSIMD ? "1" : "0");
   Out << ",";
@@ -2089,8 +2135,6 @@ void JSWriter::parseConstant(const std::string& name, const Constant* CV, bool c
   } else if (isa<ConstantAggregateZero>(CV)) {
     if (calculate) {
       unsigned Bytes = DL->getTypeStoreSize(CV->getType());
-      // FIXME: assume full 64-bit alignment for now
-      Bytes = memAlign(Bytes);
       HeapData *GlobalData = allocateAddress(name);
       for (unsigned i = 0; i < Bytes; ++i) {
         GlobalData->push_back(0);
@@ -2319,9 +2363,16 @@ bool JSTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   assert(FileType == TargetMachine::CGFT_AssemblyFile);
 
   PM.add(createExpandI64Pass());
-  PM.add(createSimplifyAllocasPass());
-  PM.add(new JSWriter(o));
+
+  CodeGenOpt::Level OptLevel = getOptLevel();
+
+  // When optimizing, there shouldn't be any opportunities for SimplifyAllocas
+  // because the regular optimizer should have taken them all (GVN, and possibly
+  // also SROA).
+  if (OptLevel == CodeGenOpt::None)
+    PM.add(createSimplifyAllocasPass());
+
+  PM.add(new JSWriter(o, OptLevel));
 
   return false;
 }
-
diff --git a/lib/Target/JSBackend/JSTargetMachine.cpp b/lib/Target/JSBackend/JSTargetMachine.cpp
new file mode 100644
index 00000000000..af428f2eb2d
--- /dev/null
+++ b/lib/Target/JSBackend/JSTargetMachine.cpp
@@ -0,0 +1,14 @@
+#include "JSTargetMachine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/PassManager.h"
+using namespace llvm;
+
+JSTargetMachine::JSTargetMachine(const Target &T, StringRef Triple,
+                                 StringRef CPU, StringRef FS, const TargetOptions &Options,
+                                 Reloc::Model RM, CodeModel::Model CM,
+                                 CodeGenOpt::Level OL)
+  : TargetMachine(T, Triple, CPU, FS, Options),
+    DL("e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
+       "f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128") {
+  CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
+}
diff --git a/lib/Target/JSBackend/JSTargetMachine.h b/lib/Target/JSBackend/JSTargetMachine.h
index b11533d6922..3912d3b5b29 100644
--- a/lib/Target/JSBackend/JSTargetMachine.h
+++ b/lib/Target/JSBackend/JSTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- JSTargetMachine.h - TargetMachine for the C++ backend --*- C++ -*-===//
+//===-- JSTargetMachine.h - TargetMachine for the JS Backend ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -22,12 +22,14 @@ namespace llvm {
 
 class formatted_raw_ostream;
 
-struct JSTargetMachine : public TargetMachine {
-  JSTargetMachine(const Target &T, StringRef TT,
+class JSTargetMachine : public TargetMachine {
+  const DataLayout DL;
+
+public:
+  JSTargetMachine(const Target &T, StringRef Triple,
                   StringRef CPU, StringRef FS, const TargetOptions &Options,
                   Reloc::Model RM, CodeModel::Model CM,
-                  CodeGenOpt::Level OL)
-    : TargetMachine(T, TT, CPU, FS, Options) {}
+                  CodeGenOpt::Level OL);
 
   virtual bool addPassesToEmitFile(PassManagerBase &PM,
                                    formatted_raw_ostream &Out,
@@ -36,12 +38,9 @@ struct JSTargetMachine : public TargetMachine {
                                    AnalysisID StartAfter,
                                    AnalysisID StopAfter);
 
-  virtual const DataLayout *getDataLayout() const { return 0; }
+  virtual const DataLayout *getDataLayout() const { return &DL; }
 };
 
-extern Target TheJSBackendTarget;
-
 } // End llvm namespace
 
-
 #endif
diff --git a/lib/Target/JSBackend/LLVMBuild.txt b/lib/Target/JSBackend/LLVMBuild.txt
index 33b433bfb7b..6601b0306ae 100644
--- a/lib/Target/JSBackend/LLVMBuild.txt
+++ b/lib/Target/JSBackend/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = TargetInfo
+subdirectories = MCTargetDesc TargetInfo
 
 [component_0]
 type = TargetGroup
@@ -27,5 +27,5 @@ parent = Target
 type = Library
 name = JSBackendCodeGen
 parent = JSBackend
-required_libraries = Core JSBackendInfo Support Target
+required_libraries = Core JSBackendInfo JSBackendDesc Support Target
 add_to_library_groups = JSBackend
diff --git a/lib/Target/JSBackend/MCTargetDesc/CMakeLists.txt b/lib/Target/JSBackend/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 00000000000..81c5eadef6a
--- /dev/null
+++ b/lib/Target/JSBackend/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMJSBackendDesc
+  JSBackendMCTargetDesc.cpp
+  )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.cpp b/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.cpp
new file mode 100644
index 00000000000..f7ba0686839
--- /dev/null
+++ b/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.cpp
@@ -0,0 +1,31 @@
+//===-- JSBackendMCTargetDesc.cpp - JS Backend Target Descriptions --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides asm.js specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JSBackendMCTargetDesc.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+static MCCodeGenInfo *createJSBackendMCCodeGenInfo(StringRef Triple, Reloc::Model RM,
+                                                   CodeModel::Model CM,
+                                                   CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+  X->InitMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeJSBackendTargetMC() {
+  // Register the MC codegen info.
+  RegisterMCCodeGenInfoFn C(TheJSBackendTarget, createJSBackendMCCodeGenInfo);
+}
diff --git a/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.h b/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.h
new file mode 100644
index 00000000000..c98a55df83b
--- /dev/null
+++ b/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.h
@@ -0,0 +1,25 @@
+//===- JSBackendMCTargetDesc.h - JS Backend Target Descriptions -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides asm.js specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef JSBACKENDMCTARGETDESC_H
+#define JSBACKENDMCTARGETDESC_H
+
+#include "llvm/Support/TargetRegistry.h"
+
+namespace llvm {
+
+extern Target TheJSBackendTarget;
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/JSBackend/MCTargetDesc/LLVMBuild.txt b/lib/Target/JSBackend/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 00000000000..91127251c9d
--- /dev/null
+++ b/lib/Target/JSBackend/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/JSBackend/MCTargetDesc/LLVMBuild.txt --------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = JSBackendDesc
+parent = JSBackend
+required_libraries = MC Support JSBackendInfo
+add_to_library_groups = JSBackend
diff --git a/lib/Target/JSBackend/MCTargetDesc/Makefile b/lib/Target/JSBackend/MCTargetDesc/Makefile
new file mode 100644
index 00000000000..9bf7e902aff
--- /dev/null
+++ b/lib/Target/JSBackend/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/JSBackend/TargetDesc/Makefile ------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMJSBackendDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/JSBackend/Makefile b/lib/Target/JSBackend/Makefile
index 8a15e7f4c18..8d8336bdbd4 100644
--- a/lib/Target/JSBackend/Makefile
+++ b/lib/Target/JSBackend/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ../../..
 LIBRARYNAME = LLVMJSBackendCodeGen
-DIRS = TargetInfo
+DIRS = MCTargetDesc TargetInfo
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/JSBackend/OptPasses.h b/lib/Target/JSBackend/OptPasses.h
index 81e3b5ed184..2f90b568b01 100644
--- a/lib/Target/JSBackend/OptPasses.h
+++ b/lib/Target/JSBackend/OptPasses.h
@@ -1,4 +1,4 @@
-//===-- JSTargetMachine.h - TargetMachine for the C++ backend --*- C++ -*-===//
+//===-- JSTargetMachine.h - TargetMachine for the JS Backend ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/JSBackend/SimplifyAllocas.cpp b/lib/Target/JSBackend/SimplifyAllocas.cpp
index 858ded32a10..a6c090e7e44 100644
--- a/lib/Target/JSBackend/SimplifyAllocas.cpp
+++ b/lib/Target/JSBackend/SimplifyAllocas.cpp
@@ -1,4 +1,4 @@
-//===-- SimplifyAllocas.cpp - TargetMachine for the C++ backend --*- C++ -*-===//
+//===-- SimplifyAllocas.cpp - Alloca optimization ---------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,29 +6,23 @@
 // License. See LICENSE.TXT for details.
 //
 //===-----------------------------------------------------------------------===//
+//
+// There shouldn't be any opportunities for this pass to do anything if the
+// regular LLVM optimizer passes are run. However, it does make things nicer
+// at -O0.
+//
+//===-----------------------------------------------------------------------===//
 
-#include <OptPasses.h>
+#include "OptPasses.h"
 
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Function.h"
 
-// XXX
-#include "llvm/Support/FormattedStream.h"
-#include <stdio.h>
-#define dump(x) fprintf(stderr, x "\n")
-#define dumpv(x, ...) fprintf(stderr, x "\n", __VA_ARGS__)
-#define dumpfail(x)       { fprintf(stderr, x "\n");              fprintf(stderr, "%s : %d\n", __FILE__, __LINE__); report_fatal_error("fail"); }
-#define dumpfailv(x, ...) { fprintf(stderr, x "\n", __VA_ARGS__); fprintf(stderr, "%s : %d\n", __FILE__, __LINE__); report_fatal_error("fail"); }
-#define dumpIR(value) { \
-  std::string temp; \
-  raw_string_ostream stream(temp); \
-  stream << *(value); \
-  fprintf(stderr, "%s\n", temp.c_str()); \
-}
+#ifdef NDEBUG
 #undef assert
-#define assert(x) { if (!(x)) dumpfail(#x); }
-// XXX
+#define assert(x) { if (!(x)) report_fatal_error(#x); }
+#endif
 
 namespace llvm {
 
@@ -115,4 +109,3 @@ extern FunctionPass *createSimplifyAllocasPass() {
 }
 
 } // End llvm namespace
-
diff --git a/lib/Target/JSBackend/TargetInfo/JSBackendTargetInfo.cpp b/lib/Target/JSBackend/TargetInfo/JSBackendTargetInfo.cpp
index cf06eaceea8..66a3f4d6e83 100644
--- a/lib/Target/JSBackend/TargetInfo/JSBackendTargetInfo.cpp
+++ b/lib/Target/JSBackend/TargetInfo/JSBackendTargetInfo.cpp
@@ -8,6 +8,7 @@
 //===--------------------------------------------------------------------===//
 
 #include "JSTargetMachine.h"
+#include "MCTargetDesc/JSBackendMCTargetDesc.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
@@ -15,8 +16,21 @@ using namespace llvm;
 Target llvm::TheJSBackendTarget;
 
 static unsigned JSBackend_TripleMatchQuality(const std::string &TT) {
-  // This class always works, but shouldn't be the default in most cases.
-  return 1;
+  switch (Triple(TT).getArch()) {
+  case Triple::asmjs:
+    // That's us!
+    return 20;
+
+  case Triple::le32:
+  case Triple::x86:
+    // For compatibility with older versions of Emscripten, we also basically
+    // support generating code for le32-unknown-nacl and i386-pc-linux-gnu,
+    // but we use a low number here so that we're not the default.
+    return 1;
+
+  default:
+    return 0;
+  }
 }
 
 extern "C" void LLVMInitializeJSBackendTargetInfo() { 
@@ -24,5 +38,3 @@ extern "C" void LLVMInitializeJSBackendTargetInfo() {
                                  "JavaScript (asm.js, emscripten) backend",
                                  &JSBackend_TripleMatchQuality);
 }
-
-extern "C" void LLVMInitializeJSBackendTargetMC() {}
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index cbea844e072..0e628c08a6e 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -37,6 +37,7 @@
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include <algorithm>
 using namespace llvm;
@@ -473,9 +474,17 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
 static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
                                        DataLayout *TD, TargetLibraryInfo *TLI) {
   bool Changed = false;
-  SmallVector<User*, 8> WorkList(V->use_begin(), V->use_end());
+  // Note that we need to use a weak value handle for the worklist items. When
+  // we delete a constant array, we may also be holding pointer to one of its
+  // elements (or an element of one of its elements if we're dealing with an
+  // array of arrays) in the worklist.
+  SmallVector<WeakVH, 8> WorkList(V->use_begin(), V->use_end());
   while (!WorkList.empty()) {
-    User *U = WorkList.pop_back_val();
+    Value *UV = WorkList.pop_back_val();
+    if (!UV)
+      continue;
+
+    User *U = cast<User>(UV);
 
     if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
       if (Init) {
diff --git a/lib/Transforms/NaCl/ExpandVarArgs.cpp b/lib/Transforms/NaCl/ExpandVarArgs.cpp
index 1b605b79ee2..6f41046850a 100644
--- a/lib/Transforms/NaCl/ExpandVarArgs.cpp
+++ b/lib/Transforms/NaCl/ExpandVarArgs.cpp
@@ -240,7 +240,8 @@ static bool ExpandVarArgCall(InstType *Call, DataLayout *DL) {
   // start of the function so that we don't leak space if the function
   // is called in a loop.
   Function *Func = Call->getParent()->getParent();
-  Instruction *Buf = new AllocaInst(VarArgsTy, "vararg_buffer");
+  AllocaInst *Buf = new AllocaInst(VarArgsTy, "vararg_buffer");
+  Buf->setAlignment(8); // XXX EMSCRIPTEN: Align for 8-byte aligned doubles.
   Func->getEntryBlock().getInstList().push_front(Buf);
 
   // Call llvm.lifetime.start/end intrinsics to indicate that Buf is
diff --git a/lib/Transforms/NaCl/LowerEmExceptionsPass.cpp b/lib/Transforms/NaCl/LowerEmExceptionsPass.cpp
index ac07530ab88..3f6f18274fd 100644
--- a/lib/Transforms/NaCl/LowerEmExceptionsPass.cpp
+++ b/lib/Transforms/NaCl/LowerEmExceptionsPass.cpp
@@ -99,19 +99,19 @@ bool LowerEmExceptions::runOnModule(Module &M) {
   Type *i8P = i8->getPointerTo();
   Type *Void = Type::getVoidTy(M.getContext());
 
-  if (!TheModule->getFunction("getHigh32")) {
+  if (!(GetHigh = TheModule->getFunction("getHigh32"))) {
     FunctionType *GetHighFunc = FunctionType::get(i32, false);
     GetHigh = Function::Create(GetHighFunc, GlobalValue::ExternalLinkage,
                                "getHigh32", TheModule);
   }
 
-  FunctionType *VoidFunc = FunctionType::get(Void, false);
-  if (!TheModule->getFunction("emscripten_preinvoke")) {
+  if (!(PreInvoke = TheModule->getFunction("emscripten_preinvoke"))) {
+    FunctionType *VoidFunc = FunctionType::get(Void, false);
     PreInvoke = Function::Create(VoidFunc, GlobalValue::ExternalLinkage, "emscripten_preinvoke", TheModule);
   }
 
-  FunctionType *IntFunc = FunctionType::get(i32, false);
-  if (!TheModule->getFunction("emscripten_postinvoke")) {
+  if (!(PostInvoke = TheModule->getFunction("emscripten_postinvoke"))) {
+    FunctionType *IntFunc = FunctionType::get(i32, false);
     PostInvoke = Function::Create(IntFunc, GlobalValue::ExternalLinkage, "emscripten_postinvoke", TheModule);
   }
 
diff --git a/lib/Transforms/NaCl/PNaClABISimplify.cpp b/lib/Transforms/NaCl/PNaClABISimplify.cpp
index 4deee01a2b9..886e0cc4cce 100644
--- a/lib/Transforms/NaCl/PNaClABISimplify.cpp
+++ b/lib/Transforms/NaCl/PNaClABISimplify.cpp
@@ -60,7 +60,9 @@ void llvm::PNaClABISimplifyAddPreOptPasses(PassManager &PM) {
 
   // LowerExpect converts Intrinsic::expect into branch weights,
   // which can then be removed after BlockPlacement.
+#if 0 // XXX EMSCRIPTEN: We support the expect intrinsic.
   PM.add(createLowerExpectIntrinsicPass());
+#endif
 #if 0 // XXX EMSCRIPTEN: We don't need this.
   // Rewrite unsupported intrinsics to simpler and portable constructs.
   PM.add(createRewriteLLVMIntrinsicsPass());
@@ -75,13 +77,17 @@ void llvm::PNaClABISimplifyAddPreOptPasses(PassManager &PM) {
 
   PM.add(createExpandVarArgsPass());
   PM.add(createExpandCtorsPass());
+#if 0 // XXX EMSCRIPTEN: We handle aliases.
   PM.add(createResolveAliasesPass());
+#endif
 #if 0 // EMSCRIPTEN: no need for tls
   PM.add(createExpandTlsPass());
 #endif
   // GlobalCleanup needs to run after ExpandTls because
   // __tls_template_start etc. are extern_weak before expansion
+#if 0 // XXX EMSCRIPTEN: We don't currently have tls, and we don't have the same complications with extern_weak
   PM.add(createGlobalCleanupPass());
+#endif
 }
 
 void llvm::PNaClABISimplifyAddPostOptPasses(PassManager &PM) {
@@ -138,7 +144,9 @@ void llvm::PNaClABISimplifyAddPostOptPasses(PassManager &PM) {
   // Remove ``asm("":::"memory")``. This must occur after rewriting
   // atomics: a ``fence seq_cst`` surrounded by ``asm("":::"memory")``
   // has special meaning and is translated differently.
+#if 0 // XXX EMSCRIPTEN: asm("":::"memory") does't have special semantics.
   PM.add(createRemoveAsmMemoryPass());
+#endif
 #if 0 // XXX EMSCRIPTEN: PNaCl replaces pointers with ints to simplify their ABI; empscripten doesn't need this.
   // ReplacePtrsWithInts assumes that getelementptr instructions and
   // ConstantExprs have already been expanded out.
@@ -156,7 +164,9 @@ void llvm::PNaClABISimplifyAddPostOptPasses(PassManager &PM) {
   // Strip dead prototytes to appease the intrinsic ABI checks.
   // ExpandVarArgs leaves around vararg intrinsics, and
   // ReplacePtrsWithInts leaves the lifetime.start/end intrinsics.
+#if 0 // XXX EMSCRIPTEN: We just ignore dead prototypes.
   PM.add(createStripDeadPrototypesPass());
+#endif
 
   // Eliminate simple dead code that the post-opt passes could have
   // created.
diff --git a/lib/Transforms/NaCl/PromoteIntegers.cpp b/lib/Transforms/NaCl/PromoteIntegers.cpp
index af34faa7e55..ed374da3dda 100644
--- a/lib/Transforms/NaCl/PromoteIntegers.cpp
+++ b/lib/Transforms/NaCl/PromoteIntegers.cpp
@@ -577,18 +577,38 @@ void PromoteIntegers::convertInstruction(Instruction *Inst, ConversionState &Sta
             State.getConverted(Binop->getOperand(1)),
             Binop->getName() + ".result", Binop), Binop);
         break;
+      // XXX EMSCRIPTEN: Implement {U,S}{Div,Rem}
+      case Instruction::UDiv:
+      case Instruction::URem:
+        NewInst = CopyDebug(BinaryOperator::Create(
+            Binop->getOpcode(),
+            getClearConverted(Binop->getOperand(0),
+                              Binop,
+                              State),
+            getClearConverted(Binop->getOperand(1),
+                              Binop,
+                              State),
+            Binop->getName() + ".result", Binop), Binop);
+         break;
+      case Instruction::SDiv:
+      case Instruction::SRem:
+        NewInst = CopyDebug(BinaryOperator::Create(
+            Binop->getOpcode(),
+            getSignExtend(State.getConverted(Binop->getOperand(0)),
+                          Binop->getOperand(0),
+                          Binop),
+            getSignExtend(State.getConverted(Binop->getOperand(1)),
+                          Binop->getOperand(0),
+                          Binop),
+            Binop->getName() + ".result", Binop), Binop);
+         break;
       case Instruction::FAdd:
       case Instruction::FSub:
       case Instruction::FMul:
-      case Instruction::UDiv:
-      case Instruction::SDiv:
       case Instruction::FDiv:
-      case Instruction::URem:
-      case Instruction::SRem:
       case Instruction::FRem:
       case Instruction::BinaryOpsEnd:
         // We should not see FP operators here.
-        // We don't handle div.
         errs() << *Inst << "\n";
         llvm_unreachable("Cannot handle binary operator");
         break;
diff --git a/test/CodeGen/JS/aliases.ll b/test/CodeGen/JS/aliases.ll
new file mode 100644
index 00000000000..7818f0de8b9
--- /dev/null
+++ b/test/CodeGen/JS/aliases.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=js -o - < %s | FileCheck %s
+
+; ModuleID = 'test/CodeGen/JS/aliases.ll'
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+; ModuleID = 'tests/hello_world.bc'
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+@.str = private unnamed_addr constant [18 x i8] c"hello, world! %d\0A\00", align 1 ; [#uses=1 type=[18 x i8]*]
+
+@othername = alias internal void (i32)* @doit
+@othername2 = alias internal void (i32)* @othername
+@othername3 = alias internal void (i32)* @othername2
+@othername4 = alias internal bitcast (void (i32)* @othername2 to void ()*)
+
+@list = global i32 ptrtoint (void ()* @othername4 to i32)
+@list2 = global <{ i32, i32, i32, i32, i32 }> <{ i32 ptrtoint (void (i32)* @doit to i32), i32 ptrtoint (void (i32)* @othername to i32), i32 ptrtoint (void (i32)* @othername2 to i32), i32 ptrtoint (void (i32)* @othername3 to i32), i32 ptrtoint (void ()* @othername4 to i32) }>
+
+
+@value = global i32 17
+@value2 = alias i32* @value
+@value3 = alias i32* @value
+
+define internal void @doit(i32 %x) {
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str, i32 0, i32 0), i32 %x) ; [#uses=0 type=i32]
+  ret void
+}
+
+;;; we just check for compilation to succeed here, specifically of @list and @list2
+; CHECK: function _main() {
+; CHECK: }
+
+define i32 @main() {
+entry:
+  call void ()* @othername4()
+  %fp = ptrtoint void ()* @othername4 to i32
+  %fp1 = add i32 %fp, 0
+  %pf = inttoptr i32 %fp1 to void (i32)*
+  %x = load i32* @value3
+  call void (i32)* %pf(i32 %x)
+  %x1 = load i32* @value2
+  call void (i32)* @othername3(i32 %x1)
+  %x2 = load i32* @value
+  call void (i32)* @othername2(i32 %x2)
+  store i32 18, i32* @value
+  %x3 = load i32* @value
+  call void (i32)* @othername(i32 %x3)
+  store i32 19, i32* @value3
+  %x4 = load i32* @value3
+  call void (i32)* @doit(i32 %x4)
+  ret i32 1
+}
+
+declare i32 @printf(i8*, ...)
+
diff --git a/test/CodeGen/JS/allocamanager.ll b/test/CodeGen/JS/allocamanager.ll
new file mode 100644
index 00000000000..c2f7c5f53d6
--- /dev/null
+++ b/test/CodeGen/JS/allocamanager.ll
@@ -0,0 +1,166 @@
+; RUN: llc -march=js -o - < %s | FileCheck %s
+
+; Basic AllocaManager feature test. Eliminate user variable cupcake in favor of
+; user variable muffin, and combine all the vararg buffers. And align the stack
+; pointer.
+
+; ModuleID = 'test/CodeGen/JS/allocamanager.ll'
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+%struct._IO_FILE = type opaque
+
+@stderr = external constant [4 x i8], align 4
+@.str = private unnamed_addr constant [26 x i8] c"hello from %s; argc is %d\00", align 1
+@.str1 = private unnamed_addr constant [33 x i8] c"message from the program: \22%s\22!\0A\00", align 1
+@.str2 = private unnamed_addr constant [38 x i8] c"with argc %d, I, %s, must say goodbye\00", align 1
+@.str3 = private unnamed_addr constant [43 x i8] c"another message from the program: \22%s\22...\0A\00", align 1
+
+; CHECK: function _foo($argc,$argv) {
+; CHECK-NOT: cupcake
+; CHECK: STACKTOP = STACKTOP + 128|0;
+; CHECK-NEXT: vararg_buffer0 =
+; CHECK-NEXT: $muffin =
+; CHECK-NOT: cupcake
+; CHECK: }
+
+; Function Attrs: nounwind
+define void @foo(i32 %argc, i8** %argv) #0 {
+entry:
+  %vararg_buffer0 = alloca <{ i8* }>, align 8
+  %vararg_lifetime_bitcast10 = bitcast <{ i8* }>* %vararg_buffer0 to i8*
+  %vararg_buffer5 = alloca <{ i32, i8* }>, align 8
+  %vararg_lifetime_bitcast6 = bitcast <{ i32, i8* }>* %vararg_buffer5 to i8*
+  %vararg_buffer2 = alloca <{ i8* }>, align 8
+  %vararg_lifetime_bitcast3 = bitcast <{ i8* }>* %vararg_buffer2 to i8*
+  %vararg_buffer1 = alloca <{ i8*, i32 }>, align 8
+  %vararg_lifetime_bitcast = bitcast <{ i8*, i32 }>* %vararg_buffer1 to i8*
+  %muffin = alloca [117 x i8], align 1
+  %cupcake = alloca [119 x i8], align 1
+  %tmp = getelementptr [117 x i8]* %muffin, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 117, i8* %tmp) #0
+  %tmp1 = load i8** %argv, align 4
+  call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast)
+  %vararg_ptr = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 0
+  store i8* %tmp1, i8** %vararg_ptr, align 4
+  %vararg_ptr1 = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 1
+  store i32 %argc, i32* %vararg_ptr1, align 4
+  %call = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i8*, i32 }>*)*)(i8* %tmp, i8* getelementptr inbounds ([26 x i8]* @.str, i32 0, i32 0), <{ i8*, i32 }>* %vararg_buffer1) #0
+  call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast)
+  %tmp2 = load %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4
+  call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast3)
+  %vararg_ptr4 = getelementptr <{ i8* }>* %vararg_buffer2, i32 0, i32 0
+  store i8* %tmp, i8** %vararg_ptr4, align 4
+  %call2 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp2, i8* getelementptr inbounds ([33 x i8]* @.str1, i32 0, i32 0), <{ i8* }>* %vararg_buffer2) #0
+  call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast3)
+  call void @llvm.lifetime.end(i64 117, i8* %tmp) #0
+  %tmp3 = getelementptr [119 x i8]* %cupcake, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 119, i8* %tmp3) #0
+  %tmp4 = load i8** %argv, align 4
+  call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast6)
+  %vararg_ptr7 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 0
+  store i32 %argc, i32* %vararg_ptr7, align 4
+  %vararg_ptr8 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 1
+  store i8* %tmp4, i8** %vararg_ptr8, align 4
+  %call5 = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i32, i8* }>*)*)(i8* %tmp3, i8* getelementptr inbounds ([38 x i8]* @.str2, i32 0, i32 0), <{ i32, i8* }>* %vararg_buffer5) #0
+  call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast6)
+  call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast10)
+  %vararg_ptr11 = getelementptr <{ i8* }>* %vararg_buffer0, i32 0, i32 0
+  store i8* %tmp3, i8** %vararg_ptr11, align 4
+  %call7 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp2, i8* getelementptr inbounds ([43 x i8]* @.str3, i32 0, i32 0), <{ i8* }>* %vararg_buffer0) #0
+  call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast10)
+  call void @llvm.lifetime.end(i64 119, i8* %tmp3) #0
+  ret void
+}
+
+; CHECK: function _bar($argc,$argv) {
+; CHECK-NOT: cupcake
+; CHECK: STACKTOP = STACKTOP + 128|0;
+; CHECK-NEXT: vararg_buffer0 =
+; CHECK-NEXT: $muffin =
+; CHECK-NOT: cupcake
+; CHECK: }
+
+; Function Attrs: nounwind
+define void @bar(i32 %argc, i8** %argv) #0 {
+entry:
+  %vararg_buffer0 = alloca <{ i8* }>, align 8
+  %vararg_lifetime_bitcast10 = bitcast <{ i8* }>* %vararg_buffer0 to i8*
+  %vararg_buffer5 = alloca <{ i32, i8* }>, align 8
+  %vararg_lifetime_bitcast6 = bitcast <{ i32, i8* }>* %vararg_buffer5 to i8*
+  %vararg_buffer2 = alloca <{ i8* }>, align 8
+  %vararg_lifetime_bitcast3 = bitcast <{ i8* }>* %vararg_buffer2 to i8*
+  %vararg_buffer1 = alloca <{ i8*, i32 }>, align 8
+  %vararg_lifetime_bitcast = bitcast <{ i8*, i32 }>* %vararg_buffer1 to i8*
+  %muffin = alloca [117 x i8], align 1
+  %cupcake = alloca [119 x i8], align 1
+  %tmp = getelementptr [117 x i8]* %muffin, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 117, i8* %tmp) #0
+  %cmp = icmp eq i32 %argc, 39
+  br i1 %cmp, label %if.end.thread, label %if.end
+
+if.end.thread:                                    ; preds = %entry
+  call void @llvm.lifetime.end(i64 117, i8* %tmp) #0
+  %tmp1 = getelementptr [119 x i8]* %cupcake, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 119, i8* %tmp1) #0
+  %.pre = load %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4
+  br label %if.then4
+
+if.end:                                           ; preds = %entry
+  %tmp2 = load i8** %argv, align 4
+  call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast)
+  %vararg_ptr = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 0
+  store i8* %tmp2, i8** %vararg_ptr, align 4
+  %vararg_ptr1 = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 1
+  store i32 %argc, i32* %vararg_ptr1, align 4
+  %call = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i8*, i32 }>*)*)(i8* %tmp, i8* getelementptr inbounds ([26 x i8]* @.str, i32 0, i32 0), <{ i8*, i32 }>* %vararg_buffer1) #0
+  call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast)
+  %tmp3 = load %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4
+  call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast3)
+  %vararg_ptr4 = getelementptr <{ i8* }>* %vararg_buffer2, i32 0, i32 0
+  store i8* %tmp, i8** %vararg_ptr4, align 4
+  %call2 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp3, i8* getelementptr inbounds ([33 x i8]* @.str1, i32 0, i32 0), <{ i8* }>* %vararg_buffer2) #0
+  call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast3)
+  call void @llvm.lifetime.end(i64 117, i8* %tmp) #0
+  %tmp4 = getelementptr [119 x i8]* %cupcake, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 119, i8* %tmp4) #0
+  %cmp3 = icmp eq i32 %argc, 45
+  br i1 %cmp3, label %if.end10, label %if.then4
+
+if.then4:                                         ; preds = %if.end, %if.end.thread
+  %tmp5 = phi %struct._IO_FILE* [ %.pre, %if.end.thread ], [ %tmp3, %if.end ]
+  %tmp6 = phi i8* [ %tmp1, %if.end.thread ], [ %tmp4, %if.end ]
+  %tmp7 = load i8** %argv, align 4
+  call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast6)
+  %vararg_ptr7 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 0
+  store i32 %argc, i32* %vararg_ptr7, align 4
+  %vararg_ptr8 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 1
+  store i8* %tmp7, i8** %vararg_ptr8, align 4
+  %call7 = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i32, i8* }>*)*)(i8* %tmp6, i8* getelementptr inbounds ([38 x i8]* @.str2, i32 0, i32 0), <{ i32, i8* }>* %vararg_buffer5) #0
+  call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast6)
+  call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast10)
+  %vararg_ptr11 = getelementptr <{ i8* }>* %vararg_buffer0, i32 0, i32 0
+  store i8* %tmp6, i8** %vararg_ptr11, align 4
+  %call9 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp5, i8* getelementptr inbounds ([43 x i8]* @.str3, i32 0, i32 0), <{ i8* }>* %vararg_buffer0) #0
+  call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast10)
+  br label %if.end10
+
+if.end10:                                         ; preds = %if.then4, %if.end
+  %tmp8 = phi i8* [ %tmp4, %if.end ], [ %tmp6, %if.then4 ]
+  call void @llvm.lifetime.end(i64 119, i8* %tmp8) #0
+  ret void
+}
+
+; Function Attrs: nounwind
+declare i32 @sprintf(i8*, i8*, i8*) #0
+
+; Function Attrs: nounwind
+declare i32 @fprintf(%struct._IO_FILE*, i8*, i8*) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/JS/asm.ll b/test/CodeGen/JS/asm.ll
new file mode 100644
index 00000000000..41a30431da9
--- /dev/null
+++ b/test/CodeGen/JS/asm.ll
@@ -0,0 +1,13 @@
+; RUN: not llc -march=js < %s
+
+; Inline asm isn't supported (yet?). llc should report an error when it
+; encounters inline asm.
+;
+; We could support the special case of an empty inline asm string without much
+; work, but code that uses such things most likely isn't portable anyway, and
+; there are usually much better alternatives.
+
+define void @foo() {
+  call void asm "", ""()
+  ret void
+}
diff --git a/test/CodeGen/JS/dead-prototypes.ll b/test/CodeGen/JS/dead-prototypes.ll
new file mode 100644
index 00000000000..2a723ed61e5
--- /dev/null
+++ b/test/CodeGen/JS/dead-prototypes.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=js < %s | not grep printf
+
+; llc shouldn't emit any code or bookkeeping for unused declarations.
+
+define void @foo() {
+  ret void
+}
+
+declare i32 @printf(i8* nocapture, ...)
diff --git a/test/CodeGen/JS/expand-i64.ll b/test/CodeGen/JS/expand-i64.ll
index fd468fc6d11..a8a1875fa48 100644
--- a/test/CodeGen/JS/expand-i64.ll
+++ b/test/CodeGen/JS/expand-i64.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -S -expand-illegal-ints < %s | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
 
 ; CHECK: define i32 @add(i32, i32, i32, i32) {
 ; CHECK:   %5 = call i32 @i64Add(i32 %0, i32 %1, i32 %2, i32 %3)
diff --git a/test/CodeGen/JS/expect-intrinsics.ll b/test/CodeGen/JS/expect-intrinsics.ll
new file mode 100644
index 00000000000..b061f52967b
--- /dev/null
+++ b/test/CodeGen/JS/expect-intrinsics.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=js < %s | FileCheck %s
+
+; Handle the llvm.expect intrinsic.
+
+; CHECK: $expval = $x;
+; CHECK: $tobool = ($expval|0)!=(0);
+define void @foo(i32 %x) {
+entry:
+  %expval = call i32 @llvm.expect.i32(i32 %x, i32 0)
+  %tobool = icmp ne i32 %expval, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  call void @callee()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.expect.i32(i32, i32) #0
+
+declare void @callee()
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/JS/ffis-f32.ll b/test/CodeGen/JS/ffis-f32.ll
new file mode 100644
index 00000000000..39d3c65b6ce
--- /dev/null
+++ b/test/CodeGen/JS/ffis-f32.ll
@@ -0,0 +1,78 @@
+; RUN: llc -emscripten-precise-f32 -march=js < %s | FileCheck %s
+
+; Use proper types to ffi calls, with float32
+
+; CHECK:      (+Math_sqrt(+1));
+; CHECK-NEXT: (Math_fround(Math_sqrt(Math_fround(+1))));
+; CHECK-NEXT: (+Math_sqrt((+$d)));
+; CHECK-NEXT: (Math_fround(Math_sqrt((Math_fround($f)))));
+; CHECK-NEXT: (+Math_ceil(+1));
+; CHECK-NEXT: (Math_fround(Math_ceil(Math_fround(+1))));
+; CHECK-NEXT: (+Math_floor(+1));
+; CHECK-NEXT: (Math_fround(Math_floor(Math_fround(+1))));
+; CHECK-NEXT: (+_min(+1,+1));
+; CHECK-NEXT: (Math_fround(+(_fmin(+1,+1))));
+; CHECK-NEXT: (+_max(+1,+1));
+; CHECK-NEXT: (Math_fround(+(_fmax(+1,+1))));
+; CHECK-NEXT: (+Math_abs(+1));
+; CHECK-NEXT: (Math_fround(+(_absf(+1))));
+; CHECK-NEXT: (+Math_sin(+1));
+; CHECK-NEXT: (Math_fround(+(Math_sin(+1))));
+define void @foo(i32 %x) {
+entry:
+  %f = fadd float 1.0, 2.0
+  %d = fadd double 1.0, 2.0
+
+  %sqrtd = call double @sqrt(double 1.0)
+  %sqrtf = call float @sqrtf(float 1.0)
+  %sqrtdv = call double @sqrt(double %d) ; check vars too
+  %sqrtfv = call float @sqrtf(float %f)
+
+  %ceild = call double @ceil(double 1.0)
+  %ceilf = call float @ceilf(float 1.0)
+
+  %floord = call double @floor(double 1.0)
+  %floorf = call float @floorf(float 1.0)
+
+  ; these could be optimized in theory
+
+  %mind = call double @min(double 1.0, double 1.0)
+  %minf = call float @fmin(float 1.0, float 1.0)
+
+  %maxd = call double @max(double 1.0, double 1.0)
+  %maxf = call float @fmax(float 1.0, float 1.0)
+
+  %absd = call double @abs(double 1.0)
+  %absf = call float @absf(float 1.0)
+
+  ; sin is NOT optimizable with floats
+
+  %sind = call double @sin(double 1.0)
+  %sinf = call float @sinf(float 1.0)
+
+  ret void
+}
+
+declare double @sqrt(double %x)
+declare float @sqrtf(float %x)
+
+declare double @ceil(double %x)
+declare float @ceilf(float %x)
+
+declare double @floor(double %x)
+declare float @floorf(float %x)
+
+declare double @min(double %x, double %y)
+declare float @fmin(float %x, float %y)
+
+declare double @max(double %x, double %y)
+declare float @fmax(float %x, float %y)
+
+declare double @abs(double %x)
+declare float @absf(float %x)
+
+declare double @sin(double %x)
+declare float @sinf(float %x)
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/JS/ffis.ll b/test/CodeGen/JS/ffis.ll
new file mode 100644
index 00000000000..9e3de7bad1f
--- /dev/null
+++ b/test/CodeGen/JS/ffis.ll
@@ -0,0 +1,78 @@
+; RUN: llc -march=js < %s | FileCheck %s
+
+; Use proper types to ffi calls, no float32
+
+; CHECK:      (+Math_sqrt(+1));
+; CHECK-NEXT: (+Math_sqrt(+1));
+; CHECK-NEXT: (+Math_sqrt((+$d)));
+; CHECK-NEXT: (+Math_sqrt((+$f)));
+; CHECK-NEXT: (+Math_ceil(+1));
+; CHECK-NEXT: (+Math_ceil(+1));
+; CHECK-NEXT: (+Math_floor(+1));
+; CHECK-NEXT: (+Math_floor(+1));
+; CHECK-NEXT: (+_min(+1,+1));
+; CHECK-NEXT: (+_fmin(+1,+1));
+; CHECK-NEXT: (+_max(+1,+1));
+; CHECK-NEXT: (+_fmax(+1,+1));
+; CHECK-NEXT: (+Math_abs(+1));
+; CHECK-NEXT: (+_absf(+1));
+; CHECK-NEXT: (+Math_sin(+1));
+; CHECK-NEXT: (+Math_sin(+1));
+define void @foo(i32 %x) {
+entry:
+  %f = fadd float 1.0, 2.0
+  %d = fadd double 1.0, 2.0
+
+  %sqrtd = call double @sqrt(double 1.0)
+  %sqrtf = call float @sqrtf(float 1.0)
+  %sqrtdv = call double @sqrt(double %d) ; check vars too
+  %sqrtfv = call float @sqrtf(float %f)
+
+  %ceild = call double @ceil(double 1.0)
+  %ceilf = call float @ceilf(float 1.0)
+
+  %floord = call double @floor(double 1.0)
+  %floorf = call float @floorf(float 1.0)
+
+  ; these could be optimized in theory
+
+  %mind = call double @min(double 1.0, double 1.0)
+  %minf = call float @fmin(float 1.0, float 1.0)
+
+  %maxd = call double @max(double 1.0, double 1.0)
+  %maxf = call float @fmax(float 1.0, float 1.0)
+
+  %absd = call double @abs(double 1.0)
+  %absf = call float @absf(float 1.0)
+
+  ; sin is NOT optimizable with floats
+
+  %sind = call double @sin(double 1.0)
+  %sinf = call float @sinf(float 1.0)
+
+  ret void
+}
+
+declare double @sqrt(double %x)
+declare float @sqrtf(float %x)
+
+declare double @ceil(double %x)
+declare float @ceilf(float %x)
+
+declare double @floor(double %x)
+declare float @floorf(float %x)
+
+declare double @min(double %x, double %y)
+declare float @fmin(float %x, float %y)
+
+declare double @max(double %x, double %y)
+declare float @fmax(float %x, float %y)
+
+declare double @abs(double %x)
+declare float @absf(float %x)
+
+declare double @sin(double %x)
+declare float @sinf(float %x)
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/JS/getelementptr.ll b/test/CodeGen/JS/getelementptr.ll
index 22919097785..2dbb868b94e 100644
--- a/test/CodeGen/JS/getelementptr.ll
+++ b/test/CodeGen/JS/getelementptr.ll
@@ -2,7 +2,7 @@
 
 ; Test simple getelementptr codegen.
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
 
 ; CHECK: function _getelementptr([[VAL_P:\$[a-z_]+]]) {
 ; CHECK:  [[GEP:\$[a-z_]+]] = (([[GEPINT:\$[a-z_]+]]) + 588)|0;
diff --git a/test/CodeGen/JS/global-alias.ll b/test/CodeGen/JS/global-alias.ll
new file mode 100644
index 00000000000..b6efc0e7e7a
--- /dev/null
+++ b/test/CodeGen/JS/global-alias.ll
@@ -0,0 +1,56 @@
+; RUN: llc -march=js < %s | FileCheck %s
+
+; Handle global aliases of various kinds.
+
+@pri = internal global [60 x i8] zeroinitializer
+@pub = global [60 x i8] zeroinitializer
+
+@pri_int = alias internal [60 x i8]* @pri
+@pri_wea = alias weak [60 x i8]* @pri
+@pri_nor = alias [60 x i8]* @pri
+
+@pub_int = alias internal [60 x i8]* @pub
+@pub_wea = alias weak [60 x i8]* @pub
+@pub_nor = alias [60 x i8]* @pub
+
+; CHECK: test0(
+; CHECK: return ([[PRI:[0-9]+]]|0);
+define [60 x i8]* @test0() {
+  ret [60 x i8]* @pri
+}
+; CHECK: test1(
+; CHECK: return ([[PRI]]|0);
+define [60 x i8]* @test1() {
+  ret [60 x i8]* @pri_int
+}
+; CHECK: test2(
+; CHECK: return ([[PRI]]|0);
+define [60 x i8]* @test2() {
+  ret [60 x i8]* @pri_wea
+}
+; CHECK: test3(
+; CHECK: return ([[PRI]]|0);
+define [60 x i8]* @test3() {
+  ret [60 x i8]* @pri_nor
+}
+
+; CHECK: test4(
+; CHECK: return ([[PUB:[0-9]+]]|0);
+define [60 x i8]* @test4() {
+  ret [60 x i8]* @pub
+}
+; CHECK: test5(
+; CHECK: return ([[PUB]]|0);
+define [60 x i8]* @test5() {
+  ret [60 x i8]* @pub_int
+}
+; CHECK: test6(
+; CHECK: return ([[PUB]]|0);
+define [60 x i8]* @test6() {
+  ret [60 x i8]* @pub_wea
+}
+; CHECK: test7(
+; CHECK: return ([[PUB]]|0);
+define [60 x i8]* @test7() {
+  ret [60 x i8]* @pub_nor
+}
diff --git a/test/CodeGen/JS/invariant-intrinsics.ll b/test/CodeGen/JS/invariant-intrinsics.ll
new file mode 100644
index 00000000000..dc156a9ffbf
--- /dev/null
+++ b/test/CodeGen/JS/invariant-intrinsics.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=js < %s | not grep invariant
+
+; llc currently emits no code or bookkeeping for invariant intrinsic calls
+; or declarations.
+
+declare void @bar(i8*)
+
+define void @foo() {
+  %p = alloca i8
+  %i = call {}* @llvm.invariant.start(i64 1, i8* %p)
+  call void @bar(i8* %p)
+  call void @llvm.invariant.end({}* %i, i64 1, i8* %p)
+  ret void
+}
+
+declare {}* @llvm.invariant.start(i64, i8* nocapture)
+declare void @llvm.invariant.end({}*, i64, i8* nocapture)
diff --git a/test/CodeGen/JS/lifetime-intrinsics.ll b/test/CodeGen/JS/lifetime-intrinsics.ll
new file mode 100644
index 00000000000..e3d8560e301
--- /dev/null
+++ b/test/CodeGen/JS/lifetime-intrinsics.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=js < %s | not grep lifetime
+
+; llc currently emits no code or bookkeeping for lifetime intrinsic calls
+; or declarations.
+
+declare void @bar(i8*)
+
+define void @foo() {
+  %p = alloca i8
+  call void @llvm.lifetime.start(i64 1, i8* %p)
+  call void @bar(i8* %p)
+  call void @llvm.lifetime.end(i64 1, i8* %p)
+  ret void
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
diff --git a/test/CodeGen/JS/mem-intrinsics.ll b/test/CodeGen/JS/mem-intrinsics.ll
new file mode 100644
index 00000000000..995d70d5351
--- /dev/null
+++ b/test/CodeGen/JS/mem-intrinsics.ll
@@ -0,0 +1,53 @@
+; RUN: llc -march=js < %s | FileCheck %s
+
+; llc should emit small aligned memcpy and memset inline.
+
+; CHECK: test_unrolled_memcpy
+; CHECK: HEAP32[$d+0>>2]=HEAP32[$s+0>>2]|0;HEAP32[$d+4>>2]=HEAP32[$s+4>>2]|0;HEAP32[$d+8>>2]=HEAP32[$s+8>>2]|0;HEAP32[$d+12>>2]=HEAP32[$s+12>>2]|0;HEAP32[$d+16>>2]=HEAP32[$s+16>>2]|0;HEAP32[$d+20>>2]=HEAP32[$s+20>>2]|0;HEAP32[$d+24>>2]=HEAP32[$s+24>>2]|0;HEAP32[$d+28>>2]=HEAP32[$s+28>>2]|0;
+define void @test_unrolled_memcpy(i8* %d, i8* %s) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 32, i32 4, i1 false)
+  ret void
+}
+
+; CHECK: test_loop_memcpy
+; CHECK: dest=$d+0|0; src=$s+0|0; stop=dest+64|0; do { HEAP32[dest>>2]=HEAP32[src>>2]|0; dest=dest+4|0; src=src+4|0; } while ((dest|0) < (stop|0))
+define void @test_loop_memcpy(i8* %d, i8* %s) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 64, i32 4, i1 false)
+  ret void
+}
+
+; CHECK: test_call_memcpy
+; CHECK: memcpy(($d|0),($s|0),65536)
+define void @test_call_memcpy(i8* %d, i8* %s) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 65536, i32 4, i1 false)
+  ret void
+}
+
+; CHECK: test_unrolled_memset
+; CHECK:  HEAP32[$d+0>>2]=0|0;HEAP32[$d+4>>2]=0|0;HEAP32[$d+8>>2]=0|0;HEAP32[$d+12>>2]=0|0;HEAP32[$d+16>>2]=0|0;HEAP32[$d+20>>2]=0|0;HEAP32[$d+24>>2]=0|0;HEAP32[$d+28>>2]=0|0;
+define void @test_unrolled_memset(i8* %d, i8* %s) {
+  call void @llvm.memset.p0i8.i32(i8* %d, i8 0, i32 32, i32 4, i1 false)
+  ret void
+}
+
+; CHECK: test_loop_memset
+; CHECK: dest=$d+0|0; stop=dest+64|0; do { HEAP32[dest>>2]=0|0; dest=dest+4|0; } while ((dest|0) < (stop|0));
+define void @test_loop_memset(i8* %d, i8* %s) {
+  call void @llvm.memset.p0i8.i32(i8* %d, i8 0, i32 64, i32 4, i1 false)
+  ret void
+}
+
+; CHECK: test_call_memset
+; CHECK: memset(($d|0),0,65536)
+define void @test_call_memset(i8* %d, i8* %s) {
+  call void @llvm.memset.p0i8.i32(i8* %d, i8 0, i32 65536, i32 4, i1 false)
+  ret void
+}
+
+; Also, don't emit declarations for the intrinsic functions.
+; CHECK-NOT: p0i8
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) #0
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/JS/phi.ll b/test/CodeGen/JS/phi.ll
new file mode 100644
index 00000000000..517f73cba39
--- /dev/null
+++ b/test/CodeGen/JS/phi.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=js < %s | FileCheck %s
+
+; Phi lowering should check for dependency cycles, including looking through
+; bitcasts, and emit extra copies as needed.
+
+; CHECK: while(1) {
+; CHECK:   $k$phi = $j;$j$phi = $k;$k = $k$phi;$j = $j$phi;
+; CHECK: }
+define void @foo(float* nocapture %p, i32* %j.init, i32* %k.init) {
+entry:
+  br label %for.body
+
+for.body:
+  %j = phi i32* [ %j.init, %entry ], [ %k.cast, %more ]
+  %k = phi i32* [ %k.init, %entry ], [ %j.cast, %more ]
+  br label %more
+
+more:
+  %j.cast = bitcast i32* %j to i32*
+  %k.cast = bitcast i32* %k to i32*
+  br label %for.body
+}
diff --git a/test/Transforms/GlobalOpt/array-elem-refs.ll b/test/Transforms/GlobalOpt/array-elem-refs.ll
new file mode 100644
index 00000000000..ec472b0e99f
--- /dev/null
+++ b/test/Transforms/GlobalOpt/array-elem-refs.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -S -globalopt | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.S = type { i8, i8 }
+
+@c = internal global i8** bitcast (i8* getelementptr (i8* bitcast ([8 x i8*]* @b to i8*), i64 48) to i8**), align 8
+@b = internal global [8 x i8*] [i8* null, i8* null, i8* null, i8* null, i8* null, i8* null, i8* getelementptr inbounds (%struct.S* @a, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S* @a, i32 0, i32 0), i64 1)], align 16
+@a = internal global %struct.S zeroinitializer, align 1
+
+; Function Attrs: nounwind uwtable
+define signext i8 @foo() #0 {
+entry:
+  %0 = load i8*** @c, align 8
+  %1 = load i8** %0, align 8
+  %2 = load i8* %1, align 1
+  ret i8 %2
+
+; CHECK-LABEL: @foo
+; CHECK: ret i8 0
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0
+}
+
+attributes #0 = { nounwind uwtable }
+
diff --git a/test/Transforms/NaCl/promote-integers.ll b/test/Transforms/NaCl/promote-integers.ll
index 7c010be32bf..baab0822cd0 100644
--- a/test/Transforms/NaCl/promote-integers.ll
+++ b/test/Transforms/NaCl/promote-integers.ll
@@ -228,6 +228,62 @@ define void @ashr1(i16 %a) {
   ret void
 }
 
+; CHECK: @udiv1
+define void @udiv1(i32 %a, i32 %b) {
+; CHECK-NEXT: %a33 = zext i32 %a to i64
+  %a33 = zext i32 %a to i33
+; CHECK-NEXT: %b33 = zext i32 %b to i64
+  %b33 = zext i32 %b to i33
+; CHECK-NEXT: %a33.clear = and i64 %a33, 8589934591
+; CHECK-NEXT: %b33.clear = and i64 %b33, 8589934591
+; CHECK-NEXT: %result = udiv i64 %a33.clear, %b33.clear
+  %result = udiv i33 %a33, %b33
+  ret void
+}
+
+; CHECK: @sdiv1
+define void @sdiv1(i32 %a, i32 %b) {
+; CHECK-NEXT: %a33 = sext i32 %a to i64
+  %a33 = sext i32 %a to i33
+; CHECK-NEXT: %b33 = sext i32 %b to i64
+; CHECK-NEXT: %a33.getsign = shl i64 %a33, 31
+; CHECK-NEXT: %a33.signed = ashr i64 %a33.getsign, 31
+; CHECK-NEXT: %b33.getsign = shl i64 %b33, 31
+; CHECK-NEXT: %b33.signed = ashr i64 %b33.getsign, 31
+  %b33 = sext i32 %b to i33
+; CHECK-NEXT: %result = sdiv i64 %a33.signed, %b33.signed
+  %result = sdiv i33 %a33, %b33
+  ret void
+}
+
+; CHECK: @urem1
+define void @urem1(i32 %a, i32 %b) {
+; CHECK-NEXT: %a33 = zext i32 %a to i64
+  %a33 = zext i32 %a to i33
+; CHECK-NEXT: %b33 = zext i32 %b to i64
+; CHECK-NEXT: %a33.clear = and i64 %a33, 8589934591
+; CHECK-NEXT: %b33.clear = and i64 %b33, 8589934591
+  %b33 = zext i32 %b to i33
+; CHECK-NEXT: %result = urem i64 %a33.clear, %b33.clear
+  %result = urem i33 %a33, %b33
+  ret void
+}
+
+; CHECK: @srem1
+define void @srem1(i32 %a, i32 %b) {
+; CHECK-NEXT: %a33 = sext i32 %a to i64
+  %a33 = sext i32 %a to i33
+; CHECK-NEXT: %b33 = sext i32 %b to i64
+; CHECK-NEXT: %a33.getsign = shl i64 %a33, 31
+; CHECK-NEXT: %a33.signed = ashr i64 %a33.getsign, 31
+; CHECK-NEXT: %b33.getsign = shl i64 %b33, 31
+; CHECK-NEXT: %b33.signed = ashr i64 %b33.getsign, 31
+  %b33 = sext i32 %b to i33
+; CHECK-NEXT: %result = srem i64 %a33.signed, %b33.signed
+  %result = srem i33 %a33, %b33
+  ret void
+}
+
 ; CHECK: @phi_icmp
 define void @phi_icmp(i32 %a) {
 entry: