diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index 0fa32c73589..c4b980c0b57 100644
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -104,7 +104,11 @@ function(add_llvm_symbol_exports target_name export_file)
       COMMENT "Creating export file for ${target_name}")
     set(export_file_linker_flag "${CMAKE_CURRENT_BINARY_DIR}/${native_export_file}")
     if(MSVC)
-      set(export_file_linker_flag "/DEF:${export_file_linker_flag}")
+      # set(export_file_linker_flag "/DEF:${export_file_linker_flag}")
+      # XXX Emscripten: Fix build when build directory has spaces in it.
+      # See bug https://github.com/kripken/emscripten/issues/3382 and
+      # https://llvm.org/bugs/show_bug.cgi?id=23313 .
+      set(export_file_linker_flag "/DEF:\"${export_file_linker_flag}\"")
     endif()
     set_property(TARGET ${target_name} APPEND_STRING PROPERTY
                  LINK_FLAGS " ${export_file_linker_flag}")
diff --git a/emscripten-version.txt b/emscripten-version.txt
index 2226cd13004..7b4009ac531 100644
--- a/emscripten-version.txt
+++ b/emscripten-version.txt
@@ -1,2 +1,2 @@
-1.31.3
+1.32.0
 
diff --git a/lib/Target/JSBackend/CMakeLists.txt b/lib/Target/JSBackend/CMakeLists.txt
index ac04b0a2a71..4d1125c3dd7 100644
--- a/lib/Target/JSBackend/CMakeLists.txt
+++ b/lib/Target/JSBackend/CMakeLists.txt
@@ -1,6 +1,5 @@
 add_llvm_target(JSBackendCodeGen
   AllocaManager.cpp
-  ExpandI64.cpp
   JSBackend.cpp
   JSTargetMachine.cpp
   JSTargetTransformInfo.cpp
diff --git a/lib/Target/JSBackend/CallHandlers.h b/lib/Target/JSBackend/CallHandlers.h
index f2f63c053eb..f77c7f60228 100644
--- a/lib/Target/JSBackend/CallHandlers.h
+++ b/lib/Target/JSBackend/CallHandlers.h
@@ -117,7 +117,7 @@ DEF_CALL_HANDLER(__default__, {
   if (Invoke) {
     // add first param
     if (F) {
-      text += utostr(getFunctionIndex(F)); // convert to function pointer
+      text += relocateFunctionPointer(utostr(getFunctionIndex(F))); // convert to function pointer
     } else {
       text += getValueAsCastStr(CV); // already a function pointer
     }
diff --git a/lib/Target/JSBackend/ExpandI64.cpp b/lib/Target/JSBackend/ExpandI64.cpp
deleted file mode 100644
index a7e1e8c766e..00000000000
--- a/lib/Target/JSBackend/ExpandI64.cpp
+++ /dev/null
@@ -1,1171 +0,0 @@
-//===- ExpandI64.cpp - Expand i64 and wider integer types -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===------------------------------------------------------------------===//
-//
-// This pass expands and lowers all operations on integers i64 and wider
-// into 32-bit operations that can be handled by JS in a natural way.
-//
-// 64-bit variables become pairs of 2 32-bit variables, for the low and
-// high 32 bit chunks. This happens for both registers and function
-// arguments. Function return values become a return of the low 32 bits
-// and a store of the high 32-bits in tempRet0, a global helper variable.
-// Larger values become more chunks of 32 bits. Currently we require that
-// types be a multiple of 32 bits.
-//
-// Many operations then become simple pairs of operations, for example
-// bitwise AND becomes and AND of each 32-bit chunk. More complex operations
-// like addition are lowered into calls into library support code in
-// Emscripten (i64Add for example).
-//
-//===------------------------------------------------------------------===//
-
-#include "OptPasses.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include <map>
-#include <vector>
-
-#include "llvm/Support/raw_ostream.h"
-
-#ifdef NDEBUG
-#undef assert
-#define assert(x) { if (!(x)) report_fatal_error(#x); }
-#endif
-
-using namespace llvm;
-
-namespace {
-
-  struct PhiBlockChange {
-    BasicBlock *DD, *SwitchBB, *NewBB;
-  };
-
-  typedef SmallVector<Value*, 2> ChunksVec;
-  typedef std::map<Value*, ChunksVec> SplitsMap;
-
-  typedef SmallVector<PHINode *, 8> PHIVec;
-  typedef SmallVector<Instruction *, 8> DeadVec;
-
-  // This is a ModulePass because the pass recreates functions in
-  // order to expand i64 arguments to pairs of i32s.
-  class ExpandI64 : public ModulePass {
-    bool Changed;
-    const DataLayout *DL;
-    Module *TheModule;
-
-    SplitsMap Splits; // old illegal value to new insts
-    PHIVec Phis;
-    std::vector<PhiBlockChange> PhiBlockChanges;
-
-    // If the function has an illegal return or argument, create a legal version
-    void ensureLegalFunc(Function *F);
-
-    // If a function is illegal, remove it
-    void removeIllegalFunc(Function *F);
-
-    // splits an illegal instruction into 32-bit chunks. We do
-    // not yet have the values yet, as they depend on other
-    // splits, so store the parts in Splits, for FinalizeInst.
-    bool splitInst(Instruction *I);
-
-    // For an illegal value, returns the split out chunks
-    // representing the low and high parts, that splitInst
-    // generated.
-    // The value can also be a constant, in which case we just
-    // split it, or a function argument, in which case we
-    // map to the proper legalized new arguments
-    //
-    // @param AllowUnreachable  It is possible for phi nodes
-    //                          to refer to unreachable blocks,
-    //                          which our traversal never
-    //                          reaches; this flag lets us
-    //                          ignore those - otherwise,
-    //                          not finding chunks is fatal
-    ChunksVec getChunks(Value *V, bool AllowUnreachable=false);
-
-    Function *Add, *Sub, *Mul, *SDiv, *UDiv, *SRem, *URem, *LShr, *AShr, *Shl, *GetHigh, *SetHigh, *FtoILow, *FtoIHigh, *DtoILow, *DtoIHigh, *SItoF, *UItoF, *SItoD, *UItoD, *BItoD, *BDtoILow, *BDtoIHigh;
-
-    void ensureFuncs();
-    unsigned getNumChunks(Type *T);
-
-  public:
-    static char ID;
-    ExpandI64() : ModulePass(ID) {
-      initializeExpandI64Pass(*PassRegistry::getPassRegistry());
-
-      Add = Sub = Mul = SDiv = UDiv = SRem = URem = LShr = AShr = Shl = GetHigh = SetHigh = NULL;
-    }
-
-    bool runOnModule(Module &M) override;
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-  };
-}
-
-char ExpandI64::ID = 0;
-INITIALIZE_PASS(ExpandI64, "expand-illegal-ints",
-                "Expand and lower illegal >i32 operations into 32-bit chunks",
-                false, false)
-
-// Utilities
-
-static Instruction *CopyDebug(Instruction *NewInst, Instruction *Original) {
-  NewInst->setDebugLoc(Original->getDebugLoc());
-  return NewInst;
-}
-
-static bool isIllegal(Type *T) {
-  return T->isIntegerTy() && T->getIntegerBitWidth() > 32;
-}
-
-static FunctionType *getLegalizedFunctionType(FunctionType *FT) {
-  SmallVector<Type*, 0> ArgTypes; // XXX
-  int Num = FT->getNumParams();
-  for (int i = 0; i < Num; i++) {
-    Type *T = FT->getParamType(i);
-    if (!isIllegal(T)) {
-      ArgTypes.push_back(T);
-    } else {
-      Type *i32 = Type::getInt32Ty(FT->getContext());
-      ArgTypes.push_back(i32);
-      ArgTypes.push_back(i32);
-    }
-  }
-  Type *RT = FT->getReturnType();
-  Type *NewRT;
-  if (!isIllegal(RT)) {
-    NewRT = RT;
-  } else {
-    NewRT = Type::getInt32Ty(FT->getContext());
-  }
-  return FunctionType::get(NewRT, ArgTypes, false);
-}
-
-// Implementation of ExpandI64
-
-static bool okToRemainIllegal(Function *F) {
-  StringRef Name = F->getName();
-  if (Name == "llvm.dbg.value") return true;
-
-  // XXX EMSCRIPTEN: These take an i64 immediate argument; since they're not
-  // real instructions, we don't need to legalize them.
-  if (Name == "llvm.lifetime.start") return true;
-  if (Name == "llvm.lifetime.end") return true;
-  if (Name == "llvm.invariant.start") return true;
-  if (Name == "llvm.invariant.end") return true;
-
-  return false;
-}
-
-unsigned ExpandI64::getNumChunks(Type *T) {
-  unsigned Num = DL->getTypeSizeInBits(T);
-  return (Num + 31) / 32;
-}
-
-static bool isLegalFunctionType(FunctionType *FT) {
-  if (isIllegal(FT->getReturnType())) {
-    return false;
-  }
-
-  int Num = FT->getNumParams();
-  for (int i = 0; i < Num; i++) {
-    if (isIllegal(FT->getParamType(i))) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-static bool isLegalInstruction(const Instruction *I) {
-  if (isIllegal(I->getType())) {
-    return false;
-  }
-
-  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-    if (isIllegal(I->getOperand(i)->getType())) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-// We can't use RecreateFunction because we need to handle
-// function and argument attributes specially.
-static Function *RecreateFunctionLegalized(Function *F, FunctionType *NewType) {
-  Function *NewFunc = Function::Create(NewType, F->getLinkage());
-
-  AttributeSet Attrs = F->getAttributes();
-  AttributeSet FnAttrs = Attrs.getFnAttributes();
-
-  // Legalizing the return value is done by storing part of the value into
-  // static storage. Subsequent analysis will see this as a memory access,
-  // so we can no longer claim to be readonly or readnone.
-  if (isIllegal(F->getReturnType())) {
-    FnAttrs = FnAttrs.removeAttribute(F->getContext(),
-                                      AttributeSet::FunctionIndex,
-                                      Attribute::ReadOnly);
-    FnAttrs = FnAttrs.removeAttribute(F->getContext(),
-                                      AttributeSet::FunctionIndex,
-                                      Attribute::ReadNone);
-  }
-
-  NewFunc->addAttributes(AttributeSet::FunctionIndex, FnAttrs);
-  NewFunc->addAttributes(AttributeSet::ReturnIndex, Attrs.getRetAttributes());
-  Function::arg_iterator AI = F->arg_begin();
-
-  // We need to recreate the attribute set, with the right indexes
-  AttributeSet NewAttrs;
-  unsigned NumArgs = F->arg_size();
-  for (unsigned i = 1, j = 1; i < NumArgs+1; i++, j++, AI++) {
-    if (isIllegal(AI->getType())) {
-      j++;
-      continue;
-    }
-    if (!Attrs.hasAttributes(i)) continue;
-    AttributeSet ParamAttrs = Attrs.getParamAttributes(i);
-    AttrBuilder AB;
-    unsigned NumSlots = ParamAttrs.getNumSlots();
-    for (unsigned k = 0; k < NumSlots; k++) {
-      for (AttributeSet::iterator I = ParamAttrs.begin(k), E = ParamAttrs.end(k); I != E; I++) {
-        AB.addAttribute(*I);
-      }
-    }
-    NewFunc->addAttributes(j, AttributeSet::get(F->getContext(), j, AB));
-  }
-
-  F->getParent()->getFunctionList().insert(F, NewFunc);
-  NewFunc->takeName(F);
-  NewFunc->getBasicBlockList().splice(NewFunc->begin(),
-                                      F->getBasicBlockList());
-  F->replaceAllUsesWith(
-      ConstantExpr::getBitCast(NewFunc,
-                               F->getFunctionType()->getPointerTo()));
-  return NewFunc;
-}
-
-void ExpandI64::ensureLegalFunc(Function *F) {
-  if (okToRemainIllegal(F)) return;
-
-  FunctionType *FT = F->getFunctionType();
-  if (isLegalFunctionType(FT)) return;
-
-  Changed = true;
-  Function *NF = RecreateFunctionLegalized(F, getLegalizedFunctionType(FT));
-  std::string Name = NF->getName();
-  if (strncmp(Name.c_str(), "llvm.", 5) == 0) {
-    // this is an intrinsic, and we are changing its signature, which will annoy LLVM, so rename
-    const size_t len = Name.size();
-    SmallString<256> NewName;
-    NewName.resize(len);
-    for (unsigned i = 0; i < len; i++) {
-      NewName[i] = Name[i] != '.' ? Name[i] : '_';
-    }
-    NF->setName(Twine(NewName));
-  }
-
-  // Move and update arguments
-  for (Function::arg_iterator Arg = F->arg_begin(), E = F->arg_end(), NewArg = NF->arg_begin();
-       Arg != E; ++Arg) {
-    if (Arg->getType() == NewArg->getType()) {
-      NewArg->takeName(Arg);
-      Arg->replaceAllUsesWith(NewArg);
-      NewArg++;
-    } else {
-      // This was legalized
-      ChunksVec &Chunks = Splits[&*Arg];
-      int Num = getNumChunks(Arg->getType());
-      assert(Num == 2);
-      for (int i = 0; i < Num; i++) {
-        Chunks.push_back(&*NewArg);
-        if (NewArg->hasName()) Chunks[i]->setName(NewArg->getName() + "$" + utostr(i));
-        NewArg++;
-      }
-    }
-  }
-}
-
-void ExpandI64::removeIllegalFunc(Function *F) {
-  if (okToRemainIllegal(F)) return;
-
-  FunctionType *FT = F->getFunctionType();
-  if (!isLegalFunctionType(FT)) {
-    F->eraseFromParent();
-  }
-}
-
-bool ExpandI64::splitInst(Instruction *I) {
-  Type *i32 = Type::getInt32Ty(I->getContext());
-  Type *i32P = i32->getPointerTo();
-  Type *i64 = Type::getInt64Ty(I->getContext());
-  Value *Zero  = Constant::getNullValue(i32);
-
-  ChunksVec &Chunks = Splits[I];
-
-  switch (I->getOpcode()) {
-    case Instruction::GetElementPtr: {
-      GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
-      SmallVector<Value*, 2> NewOps;
-      for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i) {
-        Value *Op = I->getOperand(i);
-        if (isIllegal(Op->getType())) {
-          // Truncate the operand down to one chunk.
-          NewOps.push_back(getChunks(Op)[0]);
-        } else {
-          NewOps.push_back(Op);
-        }
-      }
-      Value *NewGEP = CopyDebug(GetElementPtrInst::Create(GEP->getPointerOperand(), NewOps, "", GEP), GEP);
-      Chunks.push_back(NewGEP);
-      I->replaceAllUsesWith(NewGEP);
-      break;
-    }
-    case Instruction::SExt: {
-      ChunksVec InputChunks;
-      Value *Op = I->getOperand(0);
-      if (isIllegal(Op->getType())) {
-        InputChunks = getChunks(Op);
-      } else {
-        InputChunks.push_back(Op);
-      }
-
-      for (unsigned i = 0, e = InputChunks.size(); i != e; ++i) {
-        Value *Input = InputChunks[i];
-
-        Type *T = Input->getType();
-        Value *Chunk;
-        if (T->getIntegerBitWidth() < 32) {
-          Chunk = CopyDebug(new SExtInst(Input, i32, "", I), I);
-        } else {
-          assert(T->getIntegerBitWidth() == 32);
-          Chunk = Input;
-        }
-        Chunks.push_back(Chunk);
-      }
-
-      Instruction *Check = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_SLT, Chunks.back(), Zero), I);
-      int Num = getNumChunks(I->getType());
-      for (int i = Chunks.size(); i < Num; i++) {
-        Instruction *High = CopyDebug(new SExtInst(Check, i32, "", I), I);
-        Chunks.push_back(High);
-      }
-      break;
-    }
-    case Instruction::PtrToInt:
-    case Instruction::ZExt: {
-      Value *Op = I->getOperand(0);
-      ChunksVec InputChunks;
-      if (I->getOpcode() == Instruction::PtrToInt) {
-        InputChunks.push_back(CopyDebug(new PtrToIntInst(Op, i32, "", I), I));
-      } else if (isIllegal(Op->getType())) {
-        InputChunks = getChunks(Op);
-      } else {
-        InputChunks.push_back(Op);
-      }
-
-      for (unsigned i = 0, e = InputChunks.size(); i != e; ++i) {
-        Value *Input = InputChunks[i];
-        Type *T = Input->getType();
-
-        Value *Chunk;
-        if (T->getIntegerBitWidth() < 32) {
-          Chunk = CopyDebug(new ZExtInst(Input, i32, "", I), I);
-        } else {
-          assert(T->getIntegerBitWidth() == 32);
-          Chunk = Input;
-        }
-        Chunks.push_back(Chunk);
-      }
-
-      int Num = getNumChunks(I->getType());
-      for (int i = Chunks.size(); i < Num; i++) {
-        Chunks.push_back(Zero);
-      }
-      break;
-    }
-    case Instruction::IntToPtr:
-    case Instruction::Trunc: {
-      unsigned Num = getNumChunks(I->getType());
-      unsigned NumBits = DL->getTypeSizeInBits(I->getType());
-      ChunksVec InputChunks = getChunks(I->getOperand(0));
-      for (unsigned i = 0; i < Num; i++) {
-        Value *Input = InputChunks[i];
-
-        Value *Chunk;
-        if (NumBits < 32) {
-          Chunk = CopyDebug(new TruncInst(Input, IntegerType::get(I->getContext(), NumBits), "", I), I);
-          NumBits = 0;
-        } else {
-          Chunk = Input;
-          NumBits -= 32;
-        }
-        if (I->getOpcode() == Instruction::IntToPtr) {
-          assert(i == 0);
-          Chunk = CopyDebug(new IntToPtrInst(Chunk, I->getType(), "", I), I);
-        }
-        Chunks.push_back(Chunk);
-      }
-      if (!isIllegal(I->getType())) {
-        assert(Chunks.size() == 1);
-        I->replaceAllUsesWith(Chunks[0]);
-      }
-      break;
-    }
-    case Instruction::Load: {
-      LoadInst *LI = cast<LoadInst>(I);
-      Instruction *AI = CopyDebug(new PtrToIntInst(LI->getPointerOperand(), i32, "", I), I);
-      int Num = getNumChunks(I->getType());
-      for (int i = 0; i < Num; i++) {
-        Instruction *Add = i == 0 ? AI : CopyDebug(BinaryOperator::Create(Instruction::Add, AI, ConstantInt::get(i32, 4*i), "", I), I);
-        Instruction *Ptr = CopyDebug(new IntToPtrInst(Add, i32P, "", I), I);
-        LoadInst *Chunk = new LoadInst(Ptr, "", I); CopyDebug(Chunk, I);
-        Chunk->setAlignment(MinAlign(LI->getAlignment() == 0 ?
-                                         DL->getABITypeAlignment(LI->getType()) :
-                                         LI->getAlignment(),
-                                     4*i));
-        Chunk->setVolatile(LI->isVolatile());
-        Chunk->setOrdering(LI->getOrdering());
-        Chunk->setSynchScope(LI->getSynchScope());
-        Chunks.push_back(Chunk);
-      }
-      break;
-    }
-    case Instruction::Store: {
-      StoreInst *SI = cast<StoreInst>(I);
-      Instruction *AI = CopyDebug(new PtrToIntInst(SI->getPointerOperand(), i32, "", I), I);
-      ChunksVec InputChunks = getChunks(SI->getValueOperand());
-      int Num = InputChunks.size();
-      for (int i = 0; i < Num; i++) {
-        Instruction *Add = i == 0 ? AI : CopyDebug(BinaryOperator::Create(Instruction::Add, AI, ConstantInt::get(i32, 4*i), "", I), I);
-        Instruction *Ptr = CopyDebug(new IntToPtrInst(Add, i32P, "", I), I);
-        StoreInst *Chunk = new StoreInst(InputChunks[i], Ptr, I);
-        Chunk->setAlignment(MinAlign(SI->getAlignment() == 0 ?
-                                         DL->getABITypeAlignment(SI->getValueOperand()->getType()) :
-                                         SI->getAlignment(),
-                                     4*i));
-        Chunk->setVolatile(SI->isVolatile());
-        Chunk->setOrdering(SI->getOrdering());
-        Chunk->setSynchScope(SI->getSynchScope());
-        CopyDebug(Chunk, I);
-      }
-      break;
-    }
-    case Instruction::Ret: {
-      assert(I->getOperand(0)->getType() == i64);
-      ChunksVec InputChunks = getChunks(I->getOperand(0));
-      ensureFuncs();
-      SmallVector<Value *, 1> Args;
-      Args.push_back(InputChunks[1]);
-      CopyDebug(CallInst::Create(SetHigh, Args, "", I), I);
-      CopyDebug(ReturnInst::Create(I->getContext(), InputChunks[0], I), I);
-      break;
-    }
-    case Instruction::Add:
-    case Instruction::Sub:
-    case Instruction::Mul:
-    case Instruction::SDiv:
-    case Instruction::UDiv:
-    case Instruction::SRem:
-    case Instruction::URem:
-    case Instruction::LShr:
-    case Instruction::AShr:
-    case Instruction::Shl: {
-      ChunksVec LeftChunks = getChunks(I->getOperand(0));
-      ChunksVec RightChunks = getChunks(I->getOperand(1));
-      unsigned Num = getNumChunks(I->getType());
-      if (Num == 2) {
-        ensureFuncs();
-        Value *Low = NULL, *High = NULL;
-        Function *F = NULL;
-        switch (I->getOpcode()) {
-          case Instruction::Add:  F = Add;  break;
-          case Instruction::Sub:  F = Sub;  break;
-          case Instruction::Mul:  F = Mul;  break;
-          case Instruction::SDiv: F = SDiv; break;
-          case Instruction::UDiv: F = UDiv; break;
-          case Instruction::SRem: F = SRem; break;
-          case Instruction::URem: F = URem; break;
-          case Instruction::AShr: F = AShr; break;
-          case Instruction::LShr: {
-            if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-              unsigned Shifts = CI->getZExtValue();
-              if (Shifts == 32) {
-                Low = LeftChunks[1];
-                High = Zero;
-                break;
-              }
-            }
-            F = LShr;
-            break;
-          }
-          case Instruction::Shl: {
-            if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-              const APInt &Shifts = CI->getValue();
-              if (Shifts == 32) {
-                Low = Zero;
-                High = LeftChunks[0];
-                break;
-              }
-            }
-            F = Shl;
-            break;
-          }
-          default: assert(0);
-        }
-        if (F) {
-          // use a library call, no special optimization was found
-          SmallVector<Value *, 4> Args;
-          Args.push_back(LeftChunks[0]);
-          Args.push_back(LeftChunks[1]);
-          Args.push_back(RightChunks[0]);
-          Args.push_back(RightChunks[1]);
-          Low = CopyDebug(CallInst::Create(F, Args, "", I), I);
-          High = CopyDebug(CallInst::Create(GetHigh, "", I), I);
-        }
-        Chunks.push_back(Low);
-        Chunks.push_back(High);
-      } else {
-        // more than 64 bits. handle simple shifts for lshr and shl
-        assert(I->getOpcode() == Instruction::LShr || I->getOpcode() == Instruction::AShr || I->getOpcode() == Instruction::Shl);
-        ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
-        unsigned Shifts = CI->getZExtValue();
-        unsigned Fraction = Shifts % 32;
-        Constant *Frac = ConstantInt::get(i32, Fraction);
-        Constant *Comp = ConstantInt::get(i32, 32 - Fraction);
-        Instruction::BinaryOps Opcode, Reverse;
-        unsigned ShiftChunks, Dir;
-        Value *TopFiller = Zero;
-        if (I->getOpcode() == Instruction::Shl) {
-          Opcode = Instruction::Shl;
-          Reverse = Instruction::LShr;
-          ShiftChunks = -(Shifts/32);
-          Dir = -1;
-        } else {
-          Opcode = Instruction::LShr;
-          Reverse = Instruction::Shl;
-          ShiftChunks = Shifts/32;
-          Dir = 1;
-          if (I->getOpcode() == Instruction::AShr) {
-            Value *Cond = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_SLT, LeftChunks[LeftChunks.size()-1], Zero), I);
-            TopFiller = CopyDebug(SelectInst::Create(Cond, ConstantInt::get(i32, -1), Zero, "", I), I);
-          }
-        }
-        for (unsigned i = 0; i < Num; i++) {
-          Value *L;
-          if (i + ShiftChunks < LeftChunks.size()) {
-            L = LeftChunks[i + ShiftChunks];
-          } else {
-            L = Zero;
-          }
-
-          Value *H;
-          if (i + ShiftChunks + Dir < LeftChunks.size()) {
-            H = LeftChunks[i + ShiftChunks + Dir];
-          } else {
-            H = TopFiller;
-          }
-
-          // shifted the fractional amount
-          if (Frac != Zero && L != Zero) {
-            if (Fraction == 32) {
-              L = Zero;
-            } else {
-              L = CopyDebug(BinaryOperator::Create(Opcode, L, Frac, "", I), I);
-            }
-          }
-          // shifted the complement-fractional amount to the other side
-          if (Comp != Zero && H != Zero) {
-            if (Fraction == 0) {
-              H = TopFiller;
-            } else {
-              H = CopyDebug(BinaryOperator::Create(Reverse, H, Comp, "", I), I);
-            }
-          }
-
-          // Or the parts together. Since we may have zero, try to fold it away.
-          if (Value *V = SimplifyBinOp(Instruction::Or, L, H, DL)) {
-            Chunks.push_back(V);
-          } else {
-            Chunks.push_back(CopyDebug(BinaryOperator::Create(Instruction::Or, L, H, "", I), I));
-          }
-        }
-      }
-      break;
-    }
-    case Instruction::ICmp: {
-      ICmpInst *CE = cast<ICmpInst>(I);
-      ICmpInst::Predicate Pred = CE->getPredicate();
-      ChunksVec LeftChunks = getChunks(I->getOperand(0));
-      ChunksVec RightChunks = getChunks(I->getOperand(1));
-      switch (Pred) {
-        case ICmpInst::ICMP_EQ:
-        case ICmpInst::ICMP_NE: {
-          ICmpInst::Predicate PartPred; // the predicate to use on each of the parts
-          llvm::Instruction::BinaryOps CombineOp; // the predicate to use to combine the subcomparisons
-          int Num = LeftChunks.size();
-          if (Pred == ICmpInst::ICMP_EQ) {
-            PartPred = ICmpInst::ICMP_EQ;
-            CombineOp = Instruction::And;
-          } else {
-            PartPred = ICmpInst::ICMP_NE;
-            CombineOp = Instruction::Or;
-          }
-          // first combine 0 and 1. then combine that with 2, etc.
-          Value *Combined = NULL;
-          for (int i = 0; i < Num; i++) {
-            Value *Cmp = CopyDebug(new ICmpInst(I, PartPred, LeftChunks[i], RightChunks[i]), I);
-            Combined = !Combined ? Cmp : CopyDebug(BinaryOperator::Create(CombineOp, Combined, Cmp, "", I), I);
-          }
-          I->replaceAllUsesWith(Combined);
-          break;
-        }
-        case ICmpInst::ICMP_ULT:
-        case ICmpInst::ICMP_SLT:
-        case ICmpInst::ICMP_UGT:
-        case ICmpInst::ICMP_SGT:
-        case ICmpInst::ICMP_ULE:
-        case ICmpInst::ICMP_SLE:
-        case ICmpInst::ICMP_UGE:
-        case ICmpInst::ICMP_SGE: {
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-            if (CI->getZExtValue() == 0 && Pred == ICmpInst::ICMP_SLT) {
-              // strict < 0 is easy to do, even on non-i64, just the sign bit matters
-              Instruction *NewInst = new ICmpInst(I, ICmpInst::ICMP_SLT, LeftChunks[LeftChunks.size()-1], Zero);
-              CopyDebug(NewInst, I);
-              I->replaceAllUsesWith(NewInst);
-              return true;
-            }
-          }
-          Type *T = I->getOperand(0)->getType();
-          assert(T->isIntegerTy() && T->getIntegerBitWidth() % 32 == 0);
-          int NumChunks = getNumChunks(T);
-          assert(NumChunks >= 2);
-          ICmpInst::Predicate StrictPred = Pred;
-          ICmpInst::Predicate UnsignedPred = Pred;
-          switch (Pred) {
-            case ICmpInst::ICMP_ULE: StrictPred = ICmpInst::ICMP_ULT; break;
-            case ICmpInst::ICMP_UGE: StrictPred = ICmpInst::ICMP_UGT; break;
-            case ICmpInst::ICMP_SLE: StrictPred = ICmpInst::ICMP_SLT; UnsignedPred = ICmpInst::ICMP_ULE; break;
-            case ICmpInst::ICMP_SGE: StrictPred = ICmpInst::ICMP_SGT; UnsignedPred = ICmpInst::ICMP_UGE; break;
-            case ICmpInst::ICMP_SLT:                                  UnsignedPred = ICmpInst::ICMP_ULT; break;
-            case ICmpInst::ICMP_SGT:                                  UnsignedPred = ICmpInst::ICMP_UGT; break;
-            case ICmpInst::ICMP_ULT: break;
-            case ICmpInst::ICMP_UGT: break;
-            default: assert(0);
-          }
-          // general pattern is
-          // a,b,c < A,B,C    =>    c < C || (c == C && b < B) || (c == C && b == B && a < A)
-          Instruction *Final = CopyDebug(new ICmpInst(I, StrictPred, LeftChunks[NumChunks-1], RightChunks[NumChunks-1]), I);
-          for (int i = NumChunks-2; i >= 0; i--) {
-            Instruction *Curr = CopyDebug(new ICmpInst(I, UnsignedPred, LeftChunks[i], RightChunks[i]), I);
-            for (int j = NumChunks-1; j > i; j--) {
-              Instruction *Temp = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_EQ, LeftChunks[j], RightChunks[j]), I);
-              Curr = CopyDebug(BinaryOperator::Create(Instruction::And, Temp, Curr, "", I), I);
-            }
-            Final = CopyDebug(BinaryOperator::Create(Instruction::Or, Final, Curr, "", I), I);
-          }
-          I->replaceAllUsesWith(Final);
-          break;
-        }
-        default: assert(0);
-      }
-      break;
-    }
-    case Instruction::Select: {
-      SelectInst *SI = cast<SelectInst>(I);
-      Value *Cond = SI->getCondition();
-      ChunksVec TrueChunks = getChunks(SI->getTrueValue());
-      ChunksVec FalseChunks = getChunks(SI->getFalseValue());
-      unsigned Num = getNumChunks(I->getType());
-      for (unsigned i = 0; i < Num; i++) {
-        Instruction *Part = CopyDebug(SelectInst::Create(Cond, TrueChunks[i], FalseChunks[i], "", I), I);
-        Chunks.push_back(Part);
-      }
-      break;
-    }
-    case Instruction::PHI: {
-      PHINode *Parent = cast<PHINode>(I);
-      int Num = getNumChunks(I->getType());
-      int PhiNum = Parent->getNumIncomingValues();
-      for (int i = 0; i < Num; i++) {
-        Instruction *P = CopyDebug(PHINode::Create(i32, PhiNum, "", I), I);
-        Chunks.push_back(P);
-      }
-      // PHI node operands may not be translated yet; we'll handle them at the end.
-      Phis.push_back(Parent);
-      break;
-    }
-    case Instruction::And:
-    case Instruction::Or:
-    case Instruction::Xor: {
-      BinaryOperator *BO = cast<BinaryOperator>(I);
-      ChunksVec LeftChunks = getChunks(BO->getOperand(0));
-      ChunksVec RightChunks = getChunks(BO->getOperand(1));
-      int Num = getNumChunks(BO->getType());
-      for (int i = 0; i < Num; i++) {
-        // If there's a constant operand, it's likely enough that one of the
-        // chunks will be a trivial operation, so it's worth calling
-        // SimplifyBinOp here.
-        if (Value *V = SimplifyBinOp(BO->getOpcode(), LeftChunks[i], RightChunks[i], DL)) {
-          Chunks.push_back(V);
-        } else {
-          Chunks.push_back(CopyDebug(BinaryOperator::Create(BO->getOpcode(), LeftChunks[i], RightChunks[i], "", BO), BO));
-        }
-      }
-      break;
-    }
-    case Instruction::Call: {
-      CallInst *CI = cast<CallInst>(I);
-      Function *F = CI->getCalledFunction();
-      if (F) {
-        assert(okToRemainIllegal(F));
-        return false;
-      }
-      Value *CV = CI->getCalledValue();
-      FunctionType *OFT = NULL;
-      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
-        assert(CE);
-        assert(CE->getOpcode() == Instruction::BitCast);
-        OFT = cast<FunctionType>(cast<PointerType>(CE->getType())->getElementType());
-        Constant *C = CE->getOperand(0);
-        CV = ConstantExpr::getBitCast(C, getLegalizedFunctionType(OFT)->getPointerTo());
-      } else {
-        // this is a function pointer call
-        OFT = cast<FunctionType>(cast<PointerType>(CV->getType())->getElementType());
-        // we need to add a bitcast
-        CV = new BitCastInst(CV, getLegalizedFunctionType(OFT)->getPointerTo(), "", I);
-      }
-      // create a call with space for legal args
-      SmallVector<Value *, 0> Args; // XXX
-      int Num = OFT->getNumParams();
-      for (int i = 0; i < Num; i++) {
-        Type *T = OFT->getParamType(i);
-        if (!isIllegal(T)) {
-          Args.push_back(CI->getArgOperand(i));
-        } else {
-          assert(T == i64);
-          ChunksVec ArgChunks = getChunks(CI->getArgOperand(i));
-          Args.push_back(ArgChunks[0]);
-          Args.push_back(ArgChunks[1]);
-        }
-      }
-      Instruction *L = CopyDebug(CallInst::Create(CV, Args, "", I), I);
-      Instruction *H = NULL;
-      // legalize return value as well, if necessary
-      if (isIllegal(I->getType())) {
-        assert(I->getType() == i64);
-        ensureFuncs();
-        H = CopyDebug(CallInst::Create(GetHigh, "", I), I);
-        Chunks.push_back(L);
-        Chunks.push_back(H);
-      } else {
-        I->replaceAllUsesWith(L);
-      }
-      break;
-    }
-    case Instruction::FPToUI:
-    case Instruction::FPToSI: {
-      assert(I->getType() == i64);
-      ensureFuncs();
-      SmallVector<Value *, 1> Args;
-      Value *Input = I->getOperand(0);
-      Args.push_back(Input);
-      Instruction *L, *H;
-      if (Input->getType()->isFloatTy()) {
-        L = CopyDebug(CallInst::Create(FtoILow, Args, "", I), I);
-        H = CopyDebug(CallInst::Create(FtoIHigh, Args, "", I), I);
-      } else {
-        L = CopyDebug(CallInst::Create(DtoILow, Args, "", I), I);
-        H = CopyDebug(CallInst::Create(DtoIHigh, Args, "", I), I);
-      }
-      Chunks.push_back(L);
-      Chunks.push_back(H);
-      break;
-    }
-    case Instruction::BitCast: {
-      if (I->getType() == Type::getDoubleTy(TheModule->getContext())) {
-        // fall through to itofp
-      } else if (I->getOperand(0)->getType() == Type::getDoubleTy(TheModule->getContext())) {
-        // double to i64
-        assert(I->getType() == i64);
-        ensureFuncs();
-        SmallVector<Value *, 1> Args;
-        Args.push_back(I->getOperand(0));
-        Instruction *L = CopyDebug(CallInst::Create(BDtoILow, Args, "", I), I);
-        Instruction *H = CopyDebug(CallInst::Create(BDtoIHigh, Args, "", I), I);
-        Chunks.push_back(L);
-        Chunks.push_back(H);
-        break;
-      } else if (isa<VectorType>(I->getOperand(0)->getType()) && !isa<VectorType>(I->getType())) {
-          unsigned NumElts = getNumChunks(I->getType());
-          VectorType *IVTy = VectorType::get(i32, NumElts);
-          Instruction *B = CopyDebug(new BitCastInst(I->getOperand(0), IVTy, "", I), I);
-          for (unsigned i = 0; i < NumElts; ++i) {
-              Constant *Idx = ConstantInt::get(i32, i);
-              Instruction *Ext = CopyDebug(ExtractElementInst::Create(B, Idx, "", I), I);
-              Chunks.push_back(Ext);
-          }
-          break;
-      } else {
-        // no-op bitcast
-        assert(I->getType() == I->getOperand(0)->getType());
-        Chunks = getChunks(I->getOperand(0));
-        break;
-      }
-    }
-    case Instruction::SIToFP:
-    case Instruction::UIToFP: {
-      assert(I->getOperand(0)->getType() == i64);
-      ensureFuncs();
-      ChunksVec InputChunks = getChunks(I->getOperand(0));
-      Function *F;
-      switch (I->getOpcode()) {
-        case Instruction::SIToFP: F = I->getType() == Type::getDoubleTy(TheModule->getContext()) ? SItoD : SItoF; break;
-        case Instruction::UIToFP: F = I->getType() == Type::getDoubleTy(TheModule->getContext()) ? UItoD : UItoF; break;
-        case Instruction::BitCast: {
-          assert(I->getType() == Type::getDoubleTy(TheModule->getContext()));
-          F = BItoD;
-          break;
-        }
-        default: assert(0);
-      }
-      Instruction *D = CopyDebug(CallInst::Create(F, InputChunks, "", I), I);
-      I->replaceAllUsesWith(D);
-      break;
-    }
-    case Instruction::Switch: {
-      assert(I->getOperand(0)->getType() == i64);
-      ChunksVec InputChunks = getChunks(I->getOperand(0));
-
-      // do a switch on the lower 32 bits, into a different basic block for each target, then do a branch in each of those on the high 32 bits
-      SwitchInst* SI = cast<SwitchInst>(I);
-      BasicBlock *DD = SI->getDefaultDest();
-      BasicBlock *SwitchBB = I->getParent();
-      Function *F = SwitchBB->getParent();
-
-      unsigned NumItems = SI->getNumCases();
-      SwitchInst *LowSI = SwitchInst::Create(InputChunks[0], DD, NumItems, I); // same default destination: if lower bits do not match, go straight to default
-      CopyDebug(LowSI, I);
-
-      typedef std::pair<uint32_t, BasicBlock*> Pair;
-      typedef std::vector<Pair> Vec; // vector of pairs of high 32 bits, basic block
-      typedef std::map<uint32_t, Vec> Map; // maps low 32 bits to their Vec info
-      Map Groups;                          // (as two 64-bit values in the switch may share their lower bits)
-
-      for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) {
-        BasicBlock *BB = i.getCaseSuccessor();
-        uint64_t Bits = i.getCaseValue()->getZExtValue();
-        uint32_t LowBits = (uint32_t)Bits;
-        uint32_t HighBits = (uint32_t)(Bits >> 32);
-        Vec& V = Groups[LowBits];
-        V.push_back(Pair(HighBits, BB));
-      }
-
-      unsigned Counter = 0;
-      BasicBlock *InsertPoint = SwitchBB;
-
-      for (Map::iterator GI = Groups.begin(); GI != Groups.end(); GI++) {
-        uint32_t LowBits = GI->first;
-        Vec &V = GI->second;
-
-        BasicBlock *NewBB = BasicBlock::Create(F->getContext(), "switch64_" + utostr(Counter++), F);
-        NewBB->moveAfter(InsertPoint);
-        InsertPoint = NewBB;
-        LowSI->addCase(cast<ConstantInt>(ConstantInt::get(i32, LowBits)), NewBB);
-
-        /*if (V.size() == 1) {
-          // just one option, create a branch
-          Instruction *CheckHigh = CopyDebug(new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, InputChunks[1], ConstantInt::get(i32, V[0]->first)), I);
-          Split.ToFix.push_back(CheckHigh);
-          CopyDebug(BranchInst::Create(V[0]->second, DD, CheckHigh, NewBB), I);
-        } else {*/
-
-        // multiple options, create a switch - we could also optimize and make an icmp/branch if just one, as in commented code above
-        SwitchInst *HighSI = SwitchInst::Create(InputChunks[1], DD, V.size(), NewBB); // same default destination: if lower bits do not match, go straight to default
-        for (unsigned i = 0; i < V.size(); i++) {
-          BasicBlock *BB = V[i].second;
-          HighSI->addCase(cast<ConstantInt>(ConstantInt::get(i32, V[i].first)), BB);
-          // fix phis, we used to go SwitchBB->BB, but now go SwitchBB->NewBB->BB, so we look like we arrived from NewBB. Replace the phi from the
-          // now unneeded SwitchBB to the new BB
-          // We cannot do this here right now, as phis we encounter may be in the middle of processing (empty), so we queue these.
-          for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
-            PHINode *Phi = dyn_cast<PHINode>(I);
-            if (!Phi) break;
-            PhiBlockChange Change;
-            Change.DD = BB;
-            Change.SwitchBB = SwitchBB;
-            Change.NewBB = NewBB;
-            PhiBlockChanges.push_back(Change);
-            break; // we saw a phi on this BB, and pushed a Change
-          }
-        }
-
-        // We used to go SwitchBB->DD, but now go SwitchBB->NewBB->DD, fix that like with BB above. However here we do not replace,
-        // as the switch BB is still possible to arrive from - we can arrive at the default if either the lower bits were wrong (we
-        // arrive from the switchBB) or from the NewBB if the high bits were wrong.
-        PhiBlockChange Change;
-        Change.DD = DD;
-        Change.SwitchBB = SwitchBB;
-        Change.NewBB = NewBB;
-        PhiBlockChanges.push_back(Change);
-      }
-      break;
-    }
-    default: {
-      I->dump();
-      assert(0 && "some i64 thing we can't legalize yet");
-    }
-  }
-
-  return true;
-}
-
-ChunksVec ExpandI64::getChunks(Value *V, bool AllowUnreachable) {
-  assert(isIllegal(V->getType()));
-
-  unsigned Num = getNumChunks(V->getType());
-  Type *i32 = Type::getInt32Ty(V->getContext());
-
-  if (isa<UndefValue>(V))
-    return ChunksVec(Num, UndefValue::get(i32));
-
-  if (Constant *C = dyn_cast<Constant>(V)) {
-    ChunksVec Chunks;
-    for (unsigned i = 0; i < Num; i++) {
-      Constant *Count = ConstantInt::get(C->getType(), i * 32);
-      Constant *NewC = ConstantExpr::getTrunc(ConstantExpr::getLShr(C, Count), i32);
-      TargetLibraryInfo *TLI = 0; // TODO
-      if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
-        if (Constant *FoldedC = ConstantFoldConstantExpression(NewCE, DL, TLI)) {
-          NewC = FoldedC;
-        }
-      }
-
-      Chunks.push_back(NewC);
-    }
-    return Chunks;
-  }
-
-  if (Splits.find(V) == Splits.end()) {
-    if (AllowUnreachable)
-      return ChunksVec(Num, UndefValue::get(i32));
-    errs() << *V << "\n";
-    report_fatal_error("could not find chunks for illegal value");
-  }
-  assert(Splits[V].size() == Num);
-  return Splits[V];
-}
-
-void ExpandI64::ensureFuncs() {
-  if (Add != NULL) return;
-
-  Type *i32 = Type::getInt32Ty(TheModule->getContext());
-
-  SmallVector<Type*, 4> FourArgTypes;
-  FourArgTypes.push_back(i32);
-  FourArgTypes.push_back(i32);
-  FourArgTypes.push_back(i32);
-  FourArgTypes.push_back(i32);
-  FunctionType *FourFunc = FunctionType::get(i32, FourArgTypes, false);
-
-  Add = Function::Create(FourFunc, GlobalValue::ExternalLinkage,
-                         "i64Add", TheModule);
-  Sub = Function::Create(FourFunc, GlobalValue::ExternalLinkage,
-                         "i64Subtract", TheModule);
-  Mul = Function::Create(FourFunc, GlobalValue::ExternalLinkage,
-                         "__muldi3", TheModule);
-  SDiv = Function::Create(FourFunc, GlobalValue::ExternalLinkage,
-                         "__divdi3", TheModule);
-  UDiv = Function::Create(FourFunc, GlobalValue::ExternalLinkage,
-                         "__udivdi3", TheModule);
-  SRem = Function::Create(FourFunc, GlobalValue::ExternalLinkage,
-                         "__remdi3", TheModule);
-  URem = Function::Create(FourFunc, GlobalValue::ExternalLinkage,
-                         "__uremdi3", TheModule);
-  LShr = Function::Create(FourFunc, GlobalValue::ExternalLinkage,
-                          "bitshift64Lshr", TheModule);
-  AShr = Function::Create(FourFunc, GlobalValue::ExternalLinkage,
-                          "bitshift64Ashr", TheModule);
-  Shl = Function::Create(FourFunc, GlobalValue::ExternalLinkage,
-                          "bitshift64Shl", TheModule);
-
-  if (!(GetHigh = TheModule->getFunction("getHigh32"))) {
-    SmallVector<Type*, 0> GetHighArgTypes;
-    FunctionType *GetHighFunc = FunctionType::get(i32, GetHighArgTypes, false);
-    GetHigh = Function::Create(GetHighFunc, GlobalValue::ExternalLinkage,
-                               "getHigh32", TheModule);
-  }
-
-  Type *V = Type::getVoidTy(TheModule->getContext());
-
-  SmallVector<Type*, 1> SetHighArgTypes;
-  SetHighArgTypes.push_back(i32);
-  FunctionType *SetHighFunc = FunctionType::get(V, SetHighArgTypes, false);
-  SetHigh = Function::Create(SetHighFunc, GlobalValue::ExternalLinkage,
-                             "setHigh32", TheModule);
-
-  Type *Double = Type::getDoubleTy(TheModule->getContext());
-  Type *Float  = Type::getFloatTy(TheModule->getContext());
-
-  SmallVector<Type*, 1> FtoITypes;
-  FtoITypes.push_back(Float);
-  FunctionType *FtoIFunc = FunctionType::get(i32, FtoITypes, false);
-
-  SmallVector<Type*, 1> DtoITypes;
-  DtoITypes.push_back(Double);
-  FunctionType *DtoIFunc = FunctionType::get(i32, DtoITypes, false);
-
-  FtoILow = Function::Create(FtoIFunc, GlobalValue::ExternalLinkage,
-                             "FtoILow", TheModule);
-  FtoIHigh = Function::Create(FtoIFunc, GlobalValue::ExternalLinkage,
-                              "FtoIHigh", TheModule);
-  DtoILow = Function::Create(DtoIFunc, GlobalValue::ExternalLinkage,
-                             "DtoILow", TheModule);
-  DtoIHigh = Function::Create(DtoIFunc, GlobalValue::ExternalLinkage,
-                              "DtoIHigh", TheModule);
-  BDtoILow = Function::Create(DtoIFunc, GlobalValue::ExternalLinkage,
-                              "BDtoILow", TheModule);
-  BDtoIHigh = Function::Create(DtoIFunc, GlobalValue::ExternalLinkage,
-                               "BDtoIHigh", TheModule);
-
-  SmallVector<Type*, 2> ItoTypes;
-  ItoTypes.push_back(i32);
-  ItoTypes.push_back(i32);
-
-  FunctionType *ItoFFunc = FunctionType::get(Float, ItoTypes, false);
-  SItoF = Function::Create(ItoFFunc, GlobalValue::ExternalLinkage,
-                           "SItoF", TheModule);
-  UItoF = Function::Create(ItoFFunc, GlobalValue::ExternalLinkage,
-                           "UItoF", TheModule);
-
-  FunctionType *ItoDFunc = FunctionType::get(Double, ItoTypes, false);
-  SItoD = Function::Create(ItoDFunc, GlobalValue::ExternalLinkage,
-                           "SItoD", TheModule);
-  UItoD = Function::Create(ItoDFunc, GlobalValue::ExternalLinkage,
-                           "UItoD", TheModule);
-
-  BItoD = Function::Create(ItoDFunc, GlobalValue::ExternalLinkage,
-                           "BItoD", TheModule);
-}
-
-bool ExpandI64::runOnModule(Module &M) {
-  TheModule = &M;
-  DL = &getAnalysis<DataLayoutPass>().getDataLayout();
-  Splits.clear();
-  Changed = false;
-
-  // pre pass - legalize functions
-  for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) {
-    Function *Func = Iter++;
-    ensureLegalFunc(Func);
-  }
-
-  // first pass - split
-  DeadVec Dead;
-  for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ++Iter) {
-    Function *Func = Iter;
-    if (Func->isDeclaration()) {
-      continue;
-    }
-
-    // Walk the body of the function. We use reverse postorder so that we visit
-    // all operands of an instruction before the instruction itself. The
-    // exception to this is PHI nodes, which we put on a list and handle below.
-    ReversePostOrderTraversal<Function*> RPOT(Func);
-    for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
-         RE = RPOT.end(); RI != RE; ++RI) {
-      BasicBlock *BB = *RI;
-      for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
-           Iter != E; ) {
-        Instruction *I = Iter++;
-        if (!isLegalInstruction(I)) {
-          if (splitInst(I)) {
-            Changed = true;
-            Dead.push_back(I);
-          }
-        }
-      }
-    }
-
-    // Fix up PHI node operands.
-    while (!Phis.empty()) {
-      PHINode *PN = Phis.pop_back_val();
-      ChunksVec OutputChunks = getChunks(PN);
-      for (unsigned j = 0, je = PN->getNumIncomingValues(); j != je; ++j) {
-        Value *Op = PN->getIncomingValue(j);
-        ChunksVec InputChunks = getChunks(Op, true);
-        for (unsigned k = 0, ke = OutputChunks.size(); k != ke; ++k) {
-          PHINode *NewPN = cast<PHINode>(OutputChunks[k]);
-          NewPN->addIncoming(InputChunks[k], PN->getIncomingBlock(j));
-        }
-      }
-      PN->dropAllReferences();
-    }
-
-    // Delete instructions which were replaced. We do this after the full walk
-    // of the instructions so that all uses are replaced first.
-    while (!Dead.empty()) {
-      Instruction *D = Dead.pop_back_val();
-      D->eraseFromParent();
-    }
-
-    // Apply basic block changes to phis, now that phis are all processed (and illegal phis erased)
-    for (unsigned i = 0; i < PhiBlockChanges.size(); i++) {
-      PhiBlockChange &Change = PhiBlockChanges[i];
-      for (BasicBlock::iterator I = Change.DD->begin(); I != Change.DD->end(); ++I) {
-        PHINode *Phi = dyn_cast<PHINode>(I);
-        if (!Phi) break;
-        int Index = Phi->getBasicBlockIndex(Change.SwitchBB);
-        assert(Index >= 0);
-        Phi->addIncoming(Phi->getIncomingValue(Index), Change.NewBB);
-      }
-    }
-    PhiBlockChanges.clear();
-
-    // We only visited blocks found by a DFS walk from the entry, so we haven't
-    // visited any unreachable blocks, and they may still contain illegal
-    // instructions at this point. Being unreachable, they can simply be deleted.
-    removeUnreachableBlocks(*Func);
-  }
-
-  // post pass - clean up illegal functions that were legalized. We do this
-  // after the full walk of the functions so that all uses are replaced first.
-  for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) {
-    Function *Func = Iter++;
-    removeIllegalFunc(Func);
-  }
-
-  return Changed;
-}
-
-void ExpandI64::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<DataLayoutPass>();
-  ModulePass::getAnalysisUsage(AU);
-}
-
-namespace llvm {
-
-ModulePass *createExpandI64Pass() {
-  return new ExpandI64();
-}
-
-}
diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index 51bc2e45680..1fade5ea929 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -99,6 +99,11 @@ GlobalBase("emscripten-global-base",
            cl::desc("Where global variables start out in memory (see emscripten GLOBAL_BASE option)"),
            cl::init(8));
 
+static cl::opt<bool>
+Relocatable("emscripten-relocatable",
+            cl::desc("Whether to emit relocatable code (see emscripten RELOCATABLE option)"),
+            cl::init(false));
+
 
 extern "C" void LLVMInitializeJSBackendTarget() {
   // Register the target.
@@ -350,11 +355,23 @@ namespace {
       return V;
     }
 
+    std::string relocateFunctionPointer(std::string FP) {
+      return Relocatable ? "(fb + (" + FP + ") | 0)" : FP;
+    }
+
+    std::string relocateGlobal(std::string G) {
+      return Relocatable ? "(gb + (" + G + ") | 0)" : G;
+    }
+
     // Return a constant we are about to write into a global as a numeric offset. If the
     // value is not known at compile time, emit a postSet to that location.
     unsigned getConstAsOffset(const Value *V, unsigned AbsoluteTarget) {
       V = resolveFully(V);
       if (const Function *F = dyn_cast<const Function>(V)) {
+        if (Relocatable) {
+          PostSets += "\n HEAP32[" + relocateGlobal(utostr(AbsoluteTarget)) + " >> 2] = " + relocateFunctionPointer(utostr(getFunctionIndex(F))) + ';';
+          return 0; // emit zero in there for now, until the postSet
+        }
         return getFunctionIndex(F);
       } else if (const BlockAddress *BA = dyn_cast<const BlockAddress>(V)) {
         return getBlockAddress(BA);
@@ -365,10 +382,17 @@ namespace {
             // All postsets are of external values, so they are pointers, hence 32-bit
             std::string Name = getOpName(V);
             Externals.insert(Name);
-            PostSets += "HEAP32[" + utostr(AbsoluteTarget>>2) + "] = " + Name + ';';
+            PostSets += "\n HEAP32[" + relocateGlobal(utostr(AbsoluteTarget)) + " >> 2] = " + Name + ';';
+            return 0; // emit zero in there for now, until the postSet
+          } else if (Relocatable) {
+            // this is one of our globals, but we must relocate it. we return zero, but the caller may store
+            // an added offset, which we read at postSet time; in other words, we just add to that offset
+            std::string access = "HEAP32[" + relocateGlobal(utostr(AbsoluteTarget)) + " >> 2]";
+            PostSets += "\n " + access + " = (" + access + " | 0) + " + relocateGlobal(utostr(getGlobalAddress(V->getName().str()))) + ';';
             return 0; // emit zero in there for now, until the postSet
           }
         }
+        assert(!Relocatable);
         return getGlobalAddress(V->getName().str());
       }
     }
@@ -394,8 +418,13 @@ namespace {
         // replace double quotes with escaped single quotes
         curr = 0;
         while ((curr = code.find('"', curr)) != std::string::npos) {
-          code = code.replace(curr, 1, "\\" "\"");
-          curr += 2; // skip this one
+          if (curr == 0 || code[curr-1] != '\\') {
+            code = code.replace(curr, 1, "\\" "\"");
+            curr += 2; // skip this one
+          } else { // already escaped, escape the slash as well
+            code = code.replace(curr, 1, "\\" "\\" "\"");
+            curr += 3; // skip this one
+          }
         }
       }
       if (AsmConsts.count(code) > 0) return AsmConsts[code];
@@ -1031,8 +1060,10 @@ std::string JSWriter::getPtrUse(const Value* Ptr) {
   Type *t = cast<PointerType>(Ptr->getType())->getElementType();
   unsigned Bytes = DL->getTypeAllocSize(t);
   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
-    std::string text = "";
     unsigned Addr = getGlobalAddress(GV->getName().str());
+    if (Relocatable) {
+      return getHeapAccess(relocateGlobal(utostr(Addr)), Bytes, t->isIntegerTy() || t->isPointerTy());
+    }
     switch (Bytes) {
     default: llvm_unreachable("Unsupported type");
     case 8: return "HEAPF64[" + utostr(Addr >> 3) + "]";
@@ -1047,16 +1078,15 @@ std::string JSWriter::getPtrUse(const Value* Ptr) {
     case 2: return "HEAP16[" + utostr(Addr >> 1) + "]";
     case 1: return "HEAP8[" + utostr(Addr) + "]";
     }
-  } else {
-    return getHeapAccess(getValueAsStr(Ptr), Bytes, t->isIntegerTy() || t->isPointerTy());
   }
+  return getHeapAccess(getValueAsStr(Ptr), Bytes, t->isIntegerTy() || t->isPointerTy());
 }
 
 std::string JSWriter::getConstant(const Constant* CV, AsmCast sign) {
   if (isa<ConstantPointerNull>(CV)) return "0";
 
   if (const Function *F = dyn_cast<Function>(CV)) {
-    return utostr(getFunctionIndex(F));
+    return relocateFunctionPointer(utostr(getFunctionIndex(F)));
   }
 
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
@@ -1070,7 +1100,7 @@ std::string JSWriter::getConstant(const Constant* CV, AsmCast sign) {
       // to worry about weak or other kinds of aliases.
       return getConstant(GA->getAliasee(), sign);
     }
-    return utostr(getGlobalAddress(GV->getName().str()));
+    return relocateGlobal(utostr(getGlobalAddress(GV->getName().str())));
   }
 
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
@@ -2451,6 +2481,12 @@ void JSWriter::printFunctionBody(const Function *F) {
       Out << " return " << getParenCast(getConstant(UndefValue::get(RT)), RT, ASM_NONSPECIFIC) << ";\n";
     }
   }
+
+  if (Relocatable) {
+    if (!F->hasInternalLinkage()) {
+      Exports.push_back(getJSName(F));
+    }
+  }
 }
 
 void JSWriter::processConstants() {
@@ -2468,6 +2504,20 @@ void JSWriter::processConstants() {
       parseConstant(I->getName().str(), I->getInitializer(), false);
     }
   }
+  if (Relocatable) {
+    for (Module::const_global_iterator I = TheModule->global_begin(),
+           E = TheModule->global_end(); I != E; ++I) {
+      if (I->hasInitializer() && !I->hasInternalLinkage()) {
+        std::string Name = I->getName().str();
+        if (GlobalAddresses.find(Name) != GlobalAddresses.end()) {
+          std::string JSName = getJSName(I).substr(1);
+          if (Name == JSName) { // don't export things that have weird internal names, that C can't dlsym anyhow
+            NamedGlobals[Name] = getGlobalAddress(Name);
+          }
+        }
+      }
+    }
+  }
 }
 
 void JSWriter::printFunction(const Function *F) {
@@ -2522,7 +2572,7 @@ void JSWriter::printModuleBody() {
     if (!I->isDeclaration()) printFunction(I);
   }
   Out << "function runPostSets() {\n";
-  Out << " " << PostSets << "\n";
+  Out << PostSets << "\n";
   Out << "}\n";
   PostSets = "";
   Out << "// EMSCRIPTEN_END_FUNCTIONS\n\n";
@@ -2690,7 +2740,7 @@ void JSWriter::printModuleBody() {
     } else {
       Out << ", ";
     }
-    Out << "\"_" << I->first << "\": \"" << utostr(I->second) << "\"";
+    Out << "\"" << I->first << "\": \"" << utostr(I->second) << "\"";
   }
   Out << "},";
 
@@ -2794,7 +2844,7 @@ void JSWriter::parseConstant(const std::string& name, const Constant* CV, bool c
             if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
               C = CE->getOperand(0); // ignore bitcasts
             }
-            Exports.push_back(getJSName(C));
+            if (isa<Function>(C)) Exports.push_back(getJSName(C));
           }
         } else if ((*UI)->getName() == "llvm.global.annotations") {
           // llvm.global.annotations can be ignored.
@@ -2990,6 +3040,10 @@ bool JSWriter::runOnModule(Module &M) {
   TheModule = &M;
   DL = &getAnalysis<DataLayoutPass>().getDataLayout();
 
+  // sanity checks on options
+  assert(Relocatable ? GlobalBase == 0 : true);
+  assert(Relocatable ? EmulatedFunctionPointers : true);
+
   setupCallHandlers();
 
   printProgram("", "");
diff --git a/lib/Target/JSBackend/Relooper.cpp b/lib/Target/JSBackend/Relooper.cpp
index 14a5882a67a..d410a42cb37 100644
--- a/lib/Target/JSBackend/Relooper.cpp
+++ b/lib/Target/JSBackend/Relooper.cpp
@@ -129,8 +129,8 @@ Branch::Branch(const char *ConditionInit, const char *CodeInit) : Ancestor(NULL)
 }
 
 Branch::~Branch() {
-  if (Condition) free((void*)Condition);
-  if (Code) free((void*)Code);
+  free(static_cast<void *>(const_cast<char *>(Condition)));
+  free(static_cast<void *>(const_cast<char *>(Code)));
 }
 
 void Branch::Render(Block *Target, bool SetLabel) {
@@ -155,8 +155,8 @@ Block::Block(const char *CodeInit, const char *BranchVarInit) : Parent(NULL), Id
 }
 
 Block::~Block() {
-  if (Code) free((void*)Code);
-  if (BranchVar) free((void*)BranchVar);
+  free(static_cast<void *>(const_cast<char *>(Code)));
+  free(static_cast<void *>(const_cast<char *>(BranchVar)));
   for (BlockBranchMap::iterator iter = ProcessedBranchesOut.begin(); iter != ProcessedBranchesOut.end(); iter++) {
     delete iter->second;
   }
@@ -215,7 +215,7 @@ void Block::Render(bool InLoop) {
   // into the Simple's branches.
   MultipleShape *Fused = Shape::IsMultiple(Parent->Next);
   if (Fused) {
-    PrintDebug("Fusing Multiple to Simple\n");
+    PrintDebug("Fusing Multiple to Simple\n", 0);
     Parent->Next = Parent->Next->Next;
     Fused->UseSwitch = false; // TODO: emit switches here
     Fused->RenderLoopPrefix();
@@ -710,7 +710,7 @@ void Relooper::Calculate(Block *Entry) {
       }
 #endif
 
-      PrintDebug("creating loop block:\n");
+      PrintDebug("creating loop block:\n", 0);
       DebugDump(InnerBlocks, "  inner blocks:");
       DebugDump(Entries, "  inner entries:");
       DebugDump(Blocks, "  outer blocks:");
@@ -912,7 +912,7 @@ void Relooper::Calculate(Block *Entry) {
     //   ->Next block on them, and the blocks are what remains in Blocks (which Make* modify). In this way
     //   we avoid recursing on Next (imagine a long chain of Simples, if we recursed we could blow the stack).
     Shape *Process(BlockSet &Blocks, BlockSet& InitialEntries, Shape *Prev) {
-      PrintDebug("Process() called\n");
+      PrintDebug("Process() called\n", 0);
       BlockSet *Entries = &InitialEntries;
       BlockSet TempEntries[2];
       int CurrTempIndex = 0;
@@ -922,12 +922,12 @@ void Relooper::Calculate(Block *Entry) {
         Shape *Temp = call; \
         if (Prev) Prev->Next = Temp; \
         if (!Ret) Ret = Temp; \
-        if (!NextEntries->size()) { PrintDebug("Process() returning\n"); return Ret; } \
+        if (!NextEntries->size()) { PrintDebug("Process() returning\n", 0); return Ret; } \
         Prev = Temp; \
         Entries = NextEntries; \
         continue;
       while (1) {
-        PrintDebug("Process() running\n");
+        PrintDebug("Process() running\n", 0);
         DebugDump(Blocks, "  blocks : ");
         DebugDump(*Entries, "  entries: ");
 
@@ -1013,7 +1013,7 @@ void Relooper::Calculate(Block *Entry) {
                 if (!DeadEnd) break;
               }
               if (DeadEnd) {
-                PrintDebug("Removing nesting by not handling large group because small group is dead end\n");
+                PrintDebug("Removing nesting by not handling large group because small group is dead end\n", 0);
                 IndependentGroups.erase(LargeEntry);
               }
             }
@@ -1052,7 +1052,7 @@ void Relooper::Calculate(Block *Entry) {
 
   struct PostOptimizer {
     Relooper *Parent;
-    void *Closure;
+    std::stack<Shape*> *Closure;
 
     PostOptimizer(Relooper *ParentInit) : Parent(ParentInit), Closure(NULL) {}
 
@@ -1066,10 +1066,13 @@ void Relooper::Calculate(Block *Entry) {
 
     #define SHAPE_SWITCH(var, simple, multiple, loop) \
       if (SimpleShape *Simple = Shape::IsSimple(var)) { \
+        (void)Simple; \
         simple; \
       } else if (MultipleShape *Multiple = Shape::IsMultiple(var)) { \
+        (void)Multiple; \
         multiple; \
       } else if (LoopShape *Loop = Shape::IsLoop(var)) { \
+        (void)Loop; \
         loop; \
       }
 
@@ -1142,7 +1145,6 @@ void Relooper::Calculate(Block *Entry) {
               }
               if (Found && !Abort) {
                 for (BlockBranchMap::iterator iter = Simple->Inner->ProcessedBranchesOut.begin(); iter != Simple->Inner->ProcessedBranchesOut.end(); iter++) {
-                  Block *Target = iter->first;
                   Branch *Details = iter->second;
                   if (Details->Type == Branch::Break) {
                     Details->Type = Branch::Direct;
@@ -1194,9 +1196,9 @@ void Relooper::Calculate(Block *Entry) {
     void FindLabeledLoops(Shape *Root) {
       bool First = Closure == NULL;
       if (First) {
-        Closure = (void*)(new std::stack<Shape*>);
+        Closure = new std::stack<Shape*>;
       }
-      std::stack<Shape*> &LoopStack = *((std::stack<Shape*>*)Closure);
+      std::stack<Shape*> &LoopStack = *Closure;
 
       Shape *Next = Root;
       while (Next) {
@@ -1219,7 +1221,6 @@ void Relooper::Calculate(Block *Entry) {
             RECURSE_Multiple(Fused, FindLabeledLoops);
           }
           for (BlockBranchMap::iterator iter = Simple->Inner->ProcessedBranchesOut.begin(); iter != Simple->Inner->ProcessedBranchesOut.end(); iter++) {
-            Block *Target = iter->first;
             Branch *Details = iter->second;
             if (Details->Type == Branch::Break || Details->Type == Branch::Continue) {
               assert(LoopStack.size() > 0);
@@ -1263,7 +1264,7 @@ void Relooper::Calculate(Block *Entry) {
       }
 
       if (First) {
-        delete (std::stack<Shape*>*)Closure;
+        delete Closure;
       }
     }
 
@@ -1274,7 +1275,7 @@ void Relooper::Calculate(Block *Entry) {
     }
   };
 
-  PrintDebug("=== Optimizing shapes ===\n");
+  PrintDebug("=== Optimizing shapes ===\n", 0);
 
   PostOptimizer(this).Process(Root);
 }
@@ -1435,4 +1436,3 @@ RELOOPERDLL_API void rl_relooper_render(void *relooper) {
 }
 
 }
-
diff --git a/lib/Transforms/NaCl/ExpandVarArgs.cpp b/lib/Transforms/NaCl/ExpandVarArgs.cpp
index 53e262f0167..6e2824684d8 100644
--- a/lib/Transforms/NaCl/ExpandVarArgs.cpp
+++ b/lib/Transforms/NaCl/ExpandVarArgs.cpp
@@ -243,10 +243,8 @@ static bool ExpandVarArgCall(Module *M, InstType *Call, DataLayout *DL) {
       IRB.CreateMemCpy(Ptr, Arg, DL->getTypeAllocSize(
                                      Arg->getType()->getPointerElementType()),
                        /*Align=*/1);
-    else {
-      StoreInst *S = IRB.CreateStore(Arg, Ptr);
-      S->setAlignment(4); // EMSCRIPTEN: pnacl stack is only 4-byte aligned
-    }
+    else
+      IRB.CreateStore(Arg, Ptr);
     ++Index;
   }
 
@@ -256,14 +254,8 @@ static bool ExpandVarArgCall(Module *M, InstType *Call, DataLayout *DL) {
   ArgTypes.push_back(VarArgsTy->getPointerTo());
   FunctionType *NFTy = FunctionType::get(FuncType->getReturnType(), ArgTypes,
                                          /*isVarArg=*/false);
-  /// XXX EMSCRIPTEN: Handle Constants as well as Instructions, since we
-  /// don't run the ConstantExpr lowering pass.
-  Value *CastFunc;
-  if (Constant *C = dyn_cast<Constant>(Call->getCalledValue()))
-    CastFunc = ConstantExpr::getBitCast(C, NFTy->getPointerTo());
-  else
-    CastFunc = IRB.CreateBitCast(Call->getCalledValue(),
-                                 NFTy->getPointerTo(), "vararg_func");
+  Value *CastFunc = IRB.CreateBitCast(Call->getCalledValue(),
+                                      NFTy->getPointerTo(), "vararg_func");
 
   // Create the converted function call.
   FixedArgs.push_back(Buf);