diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7bcc1f3bc66..ab16ccd2a43 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,6 +65,9 @@ if (NOT PACKAGE_VERSION)
   set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}svn")
 endif()
 
+# @LOCALMOD
+option(LLVM_INSTALL_UTILS "Include utility binaries in the 'install' target." OFF)
+
 option(LLVM_INSTALL_TOOLCHAIN_ONLY "Only include toolchain files in the 'install' target." OFF)
 
 option(LLVM_USE_FOLDERS "Enable solution folders in Visual Studio. Disable for Express versions." ON)
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index 2ef73fb8e5d..0fa32c73589 100644
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -500,6 +500,11 @@ endmacro(add_llvm_example name)
 macro(add_llvm_utility name)
   add_llvm_executable(${name} ${ARGN})
   set_target_properties(${name} PROPERTIES FOLDER "Utils")
+# @LOCALMOD-BEGIN
+  if( LLVM_INSTALL_UTILS )
+    install (TARGETS ${name} RUNTIME DESTINATION bin)
+  endif()
+# @LOCALMOD-END
 endmacro(add_llvm_utility name)
 
 
diff --git a/emscripten-version.txt b/emscripten-version.txt
index 816b2c7aa4f..5677c30f982 100644
--- a/emscripten-version.txt
+++ b/emscripten-version.txt
@@ -1,2 +1,2 @@
-1.30.1
+1.30.2
 
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index 511da0e4434..956d0e7416b 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -48,7 +48,6 @@ class Triple {
 
     arm,        // ARM (little endian): arm, armv.*, xscale
     armeb,      // ARM (big endian): armeb
-    asmjs,      // asm.js: asmjs
     aarch64,    // AArch64 (little endian): aarch64
     aarch64_be, // AArch64 (big endian): aarch64_be
     hexagon,    // Hexagon: hexagon
@@ -74,6 +73,7 @@ class Triple {
     nvptx64,    // NVPTX: 64-bit
     le32,       // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten)
     le64,       // le64: generic little-endian 64-bit CPU (PNaCl / Emscripten)
+    asmjs,      // asm.js JavaScript subset @LOCALMOD Emscripten
     amdil,      // AMDIL
     amdil64,    // AMDIL with 64-bit pointers
     hsail,      // AMD HSAIL
@@ -134,7 +134,7 @@ class Triple {
     Haiku,
     Minix,
     RTEMS,
-    Emscripten, // Emscripten
+    Emscripten, // Emscripten JavaScript runtime @LOCALMOD Emscripten
     NaCl,       // Native Client
     CNK,        // BG/P Compute-Node Kernel
     Bitrig,
@@ -380,6 +380,30 @@ class Triple {
   bool isOSMSVCRT() const { return false; }
   bool isOSWindows() const { return false; }
   bool isOSNaCl() const { return true; }
+  bool isOSEmscripten() const { return false; }
+  bool isOSLinux() const { return false; }
+  bool isOSBinFormatELF() const { return true; }
+  bool isOSBinFormatCOFF() const { return false; }
+  bool isOSBinFormatMachO() const { return false; }
+#elif defined(__EMSCRIPTEN__)
+  bool isMacOSX() const { return false; }
+  bool isiOS() const { return false; }
+  bool isOSDarwin() const { return false; }
+  bool isOSNetBSD() const { return false; }
+  bool isOSOpenBSD() const { return false; }
+  bool isOSFreeBSD() const { return false; }
+  bool isOSSolaris() const { return false; }
+  bool isOSBitrig() const { return false; }
+  bool isWindowsMSVCEnvironment() const { return false; }
+  bool isKnownWindowsMSVCEnvironment() const { return false; }
+  bool isWindowsItaniumEnvironment() const { return false; }
+  bool isWindowsCygwinEnvironment() const { return false; }
+  bool isWindowsGNUEnvironment() const { return false; }
+  bool isOSCygMing() const { return false; }
+  bool isOSMSVCRT() const { return false; }
+  bool isOSWindows() const { return false; }
+  bool isOSNaCl() const { return false; }
+  bool isOSEmscripten() const { return true; }
   bool isOSLinux() const { return false; }
   bool isOSBinFormatELF() const { return true; }
   bool isOSBinFormatCOFF() const { return false; }
@@ -464,6 +488,13 @@ class Triple {
     return getOS() == Triple::NaCl;
   }
 
+  // @LOCALMOD-START Emscripten
+  /// \brief Tests whether the OS is Emscripten.
+  bool isOSEmscripten() const {
+    return getOS() == Triple::Emscripten;
+  }
+  // @LOCALMOD-END Emscripten
+
   /// \brief Tests whether the OS is Linux.
   bool isOSLinux() const {
     return getOS() == Triple::Linux;
diff --git a/include/llvm/Bitcode/NaCl/NaClBitcodeMunge.h b/include/llvm/Bitcode/NaCl/NaClBitcodeMunge.h
index 31a5c473363..938b7ced65d 100644
--- a/include/llvm/Bitcode/NaCl/NaClBitcodeMunge.h
+++ b/include/llvm/Bitcode/NaCl/NaClBitcodeMunge.h
@@ -141,6 +141,8 @@ class NaClBitcodeMunger {
   raw_string_ostream FatalStream;
   // The stack of maximum abbreviation indices allowed by block enter record.
   SmallVector<uint64_t, 3> AbbrevIndexLimitStack;
+  // The buffer for the contents of the munged input.
+  SmallVector<char, 1024> MungedInputBuffer;
 
   // Records that an error occurred, and returns stream to print error
   // message to.
diff --git a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
index eb3a23f3cc1..6bd4abe0f2a 100644
--- a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
+++ b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
@@ -126,9 +126,8 @@ namespace llvm {
 
   /// NaClObjDump - Read PNaCl bitcode file from input, and print a
   /// textual representation of its contents. NoRecords and NoAssembly
-  /// define what should not be included in the dump. Note: The caller
-  /// retains ownership of the Input memory buffer.
-  bool NaClObjDump(MemoryBuffer *Input, raw_ostream &output,
+  /// define what should not be included in the dump.
+  bool NaClObjDump(MemoryBufferRef Input, raw_ostream &output,
                    bool NoRecords, bool NoAssembly);
 
 } // end llvm namespace
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 13816312442..87c45a82a03 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -312,7 +312,9 @@ void initializeFlattenGlobalsPass(PassRegistry&);
 void initializeGlobalCleanupPass(PassRegistry&);
 void initializeGlobalizeConstantVectorsPass(PassRegistry&);
 void initializeInsertDivideCheckPass(PassRegistry&);
+void initializeInternalizeUsedGlobalsPass(PassRegistry&);
 void initializeNaClCcRewritePass(PassRegistry&);
+void initializeSimplifyStructRegSignaturesPass(PassRegistry&);
 void initializePNaClABIVerifyFunctionsPass(PassRegistry&);
 void initializePNaClABIVerifyModulePass(PassRegistry&);
 void initializePNaClSjLjEHPass(PassRegistry&);
diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h
index 1387618fde4..7b22fb3b5e3 100644
--- a/include/llvm/Transforms/NaCl.h
+++ b/include/llvm/Transforms/NaCl.h
@@ -52,6 +52,8 @@ ModulePass *createExpandVarArgsPass();
 ModulePass *createFlattenGlobalsPass();
 ModulePass *createGlobalCleanupPass();
 ModulePass *createGlobalizeConstantVectorsPass();
+ModulePass *createInternalizeUsedGlobalsPass();
+ModulePass *createSimplifyStructRegSignaturesPass();
 ModulePass *createPNaClSjLjEHPass();
 ModulePass *createReplacePtrsWithIntsPass();
 ModulePass *createResolveAliasesPass();
diff --git a/lib/Bitcode/NaCl/Analysis/NaClCompress.cpp b/lib/Bitcode/NaCl/Analysis/NaClCompress.cpp
index 18465eed435..d4c327f6b18 100644
--- a/lib/Bitcode/NaCl/Analysis/NaClCompress.cpp
+++ b/lib/Bitcode/NaCl/Analysis/NaClCompress.cpp
@@ -42,6 +42,7 @@
 #include "llvm/Bitcode/NaCl/NaClCompress.h"
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Bitcode/NaCl/AbbrevTrieNode.h"
 #include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
 #include "llvm/Bitcode/NaCl/NaClBitstreamWriter.h"
@@ -60,8 +61,8 @@ static bool Error(const std::string &Err) {
 }
 
 // Prints out the abbreviation in readable form to the given Stream.
-static void PrintAbbrev(raw_ostream &Stream,
-                        unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {
+static void printAbbrev(raw_ostream &Stream, unsigned BlockID,
+                        const NaClBitCodeAbbrev *Abbrev) {
   Stream << "Abbrev(block " << BlockID << "): ";
   Abbrev->Print(Stream);
 }
@@ -74,37 +75,36 @@ typedef std::map<unsigned, unsigned> BitstreamToInternalAbbrevMapType;
 /// corresponding internal abbreviation indices.
 class AbbrevBitstreamToInternalMap {
 public:
-
   AbbrevBitstreamToInternalMap()
       : NextBitstreamAbbrevIndex(0) {}
 
   /// Returns the bitstream abbreviaion index that will be associated
   /// with the next internal abbreviation index.
-  unsigned GetNextBitstreamAbbrevIndex() {
+  unsigned getNextBitstreamAbbrevIndex() {
     return NextBitstreamAbbrevIndex;
   }
 
   /// Changes the next bitstream abbreviation index to the given value.
-  void SetNextBitstreamAbbrevIndex(unsigned NextIndex) {
+  void setNextBitstreamAbbrevIndex(unsigned NextIndex) {
     NextBitstreamAbbrevIndex = NextIndex;
   }
 
   /// Returns true if there is an internal abbreviation index for the
   /// given bitstream abbreviation.
-  bool DefinesBitstreamAbbrevIndex(unsigned Index) {
+  bool definesBitstreamAbbrevIndex(unsigned Index) {
     return BitstreamToInternalAbbrevMap.find(Index) !=
         BitstreamToInternalAbbrevMap.end();
   }
 
   /// Returns the internal abbreviation index for the given bitstream
   /// abbreviation index.
-  unsigned GetInternalAbbrevIndex(unsigned Index) {
+  unsigned getInternalAbbrevIndex(unsigned Index) {
     return BitstreamToInternalAbbrevMap.at(Index);
   }
 
   /// Installs the given internal abbreviation index using the next
   /// available bitstream abbreviation index.
-  void InstallNewBitstreamAbbrevIndex(unsigned InternalIndex) {
+  void installNewBitstreamAbbrevIndex(unsigned InternalIndex) {
     BitstreamToInternalAbbrevMap[NextBitstreamAbbrevIndex++] = InternalIndex;
   }
 
@@ -136,7 +136,7 @@ class BlockAbbrevs {
       Abbrevs.push_back(Abbrev);
     }
     GlobalAbbrevBitstreamToInternalMap.
-        SetNextBitstreamAbbrevIndex(Abbrevs.size());
+        setNextBitstreamAbbrevIndex(Abbrevs.size());
   }
 
   ~BlockAbbrevs() {
@@ -158,9 +158,9 @@ class BlockAbbrevs {
 
   // Returns the index to the corresponding application abbreviation,
   // if it exists.  Otherwise return NO_SUCH_ABBREVIATION;
-  int FindAbbreviation(const NaClBitCodeAbbrev *Abbrev) const {
-    for (unsigned i = GetFirstApplicationAbbreviation();
-         i < GetNumberAbbreviations(); ++i) {
+  int findAbbreviation(const NaClBitCodeAbbrev *Abbrev) const {
+    for (unsigned i = getFirstApplicationAbbreviation();
+         i < getNumberAbbreviations(); ++i) {
       if (*Abbrevs[i] == *Abbrev) return i;
     }
     return NO_SUCH_ABBREVIATION;
@@ -172,8 +172,8 @@ class BlockAbbrevs {
   /// the given abbreviation. Returns true if new abbreviation.
   /// Updates Index to the index where the abbreviation is located
   /// in the set of abbreviations.
-  bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev, int &Index) {
-    Index = FindAbbreviation(Abbrev);
+  bool addAbbreviation(NaClBitCodeAbbrev *Abbrev, int &Index) {
+    Index = findAbbreviation(Abbrev);
     if (Index != NO_SUCH_ABBREVIATION) {
       // Already defined, don't install.
       Abbrev->dropRef();
@@ -190,24 +190,24 @@ class BlockAbbrevs {
   /// defined for the block. Guarantees that duplicate abbreviations
   /// are not added to the block. Note: Code takes ownership of
   /// the given abbreviation. Returns true if new abbreviation.
-  bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev) {
+  bool addAbbreviation(NaClBitCodeAbbrev *Abbrev) {
     int Index;
-    return AddAbbreviation(Abbrev, Index);
+    return addAbbreviation(Abbrev, Index);
   }
 
   /// The block ID associated with the block.
-  unsigned GetBlockID() const {
+  unsigned getBlockID() const {
     return BlockID;
   }
 
   /// Returns the index of the frist application abbreviation for the
   /// block.
-  unsigned GetFirstApplicationAbbreviation() const {
+  unsigned getFirstApplicationAbbreviation() const {
     return naclbitc::FIRST_APPLICATION_ABBREV;
   }
 
   // Returns the number of abbreviations associated with the block.
-  unsigned GetNumberAbbreviations() const {
+  unsigned getNumberAbbreviations() const {
     return Abbrevs.size();
   }
 
@@ -218,38 +218,38 @@ class BlockAbbrevs {
 
   /// Returns the abbreviation associated with the given abbreviation
   /// index.
-  NaClBitCodeAbbrev *GetIndexedAbbrev(unsigned index) {
+  NaClBitCodeAbbrev *getIndexedAbbrev(unsigned index) {
     if (index >= Abbrevs.size()) return 0;
     return Abbrevs[index];
   }
 
   // Builds the corresponding fast lookup map for finding abbreviations
   // that applies to abbreviations in the block
-  void BuildAbbrevLookupSizeMap(
+  void buildAbbrevLookupSizeMap(
       const NaClBitcodeCompressor::CompressFlags &Flags) {
-    NaClBuildAbbrevLookupMap(GetLookupMap(),
-                             GetAbbrevs(),
-                             GetFirstApplicationAbbreviation());
-    if (Flags.ShowAbbrevLookupTries) PrintLookupMap(errs());
+    NaClBuildAbbrevLookupMap(getLookupMap(),
+                             getAbbrevs(),
+                             getFirstApplicationAbbreviation());
+    if (Flags.ShowAbbrevLookupTries) printLookupMap(errs());
   }
 
-  AbbrevBitstreamToInternalMap &GetGlobalAbbrevBitstreamToInternalMap() {
+  AbbrevBitstreamToInternalMap &getGlobalAbbrevBitstreamToInternalMap() {
     return GlobalAbbrevBitstreamToInternalMap;
   }
 
-  AbbrevLookupSizeMap &GetLookupMap() {
+  AbbrevLookupSizeMap &getLookupMap() {
     return LookupMap;
   }
 
   // Returns lower level vector of abbreviations.
-  const AbbrevVector &GetAbbrevs() const {
+  const AbbrevVector &getAbbrevs() const {
     return Abbrevs;
   }
 
   // Returns the abbreviation (index) to use for the corresponding
   // record, based on the abbreviations of this block.  Note: Assumes
-  // that BuildAbbrevLookupSizeMap has already been called.
-  unsigned GetRecordAbbrevIndex(const NaClBitcodeRecordData &Record) {
+  // that buildAbbrevLookupSizeMap has already been called.
+  unsigned getRecordAbbrevIndex(const NaClBitcodeRecordData &Record) {
     unsigned BestIndex = 0; // Ignored unless found candidate.
     unsigned BestScore = 0; // Number of bits associated with BestIndex.
     bool FoundCandidate = false;
@@ -269,7 +269,7 @@ class BlockAbbrevs {
                    IterEnd = Abbreviations.end();
                Iter != IterEnd; ++Iter) {
             uint64_t NumBits = 0;
-            if (CanUseAbbreviation(Values, Iter->second, NumBits)) {
+            if (canUseAbbreviation(Values, Iter->second, NumBits)) {
               if (!FoundCandidate || NumBits < BestScore) {
                 // Use this as candidate.
                 BestIndex = Iter->first;
@@ -281,7 +281,7 @@ class BlockAbbrevs {
         }
       }
     }
-    if (FoundCandidate && BestScore <= UnabbreviatedSize(Record)) {
+    if (FoundCandidate && BestScore <= unabbreviatedSize(Record)) {
       return BestIndex;
     }
     return naclbitc::UNABBREV_RECORD;
@@ -289,12 +289,12 @@ class BlockAbbrevs {
 
   // Computes the number of bits that will be generated by the
   // corresponding read record, if no abbreviation is used.
-  static uint64_t UnabbreviatedSize(const NaClBitcodeRecordData &Record) {
-    uint64_t NumBits = MatchVBRBits(Record.Code, DefaultVBRBits);
+  static uint64_t unabbreviatedSize(const NaClBitcodeRecordData &Record) {
+    uint64_t NumBits = matchVBRBits(Record.Code, DefaultVBRBits);
     size_t NumValues = Record.Values.size();
-    NumBits += MatchVBRBits(NumValues, DefaultVBRBits);
+    NumBits += matchVBRBits(NumValues, DefaultVBRBits);
     for (size_t Index = 0; Index < NumValues; ++Index) {
-      NumBits += MatchVBRBits(Record.Values[Index], DefaultVBRBits);
+      NumBits += matchVBRBits(Record.Values[Index], DefaultVBRBits);
     }
     return NumBits;
   }
@@ -303,7 +303,7 @@ class BlockAbbrevs {
   // record. Sets NumBits to the number of bits the abbreviation will
   // generate. Note: Value of NumBits is undefined if this function
   // return false.
-  static bool CanUseAbbreviation(NaClBitcodeValues &Values,
+  static bool canUseAbbreviation(NaClBitcodeValues &Values,
                                  NaClBitCodeAbbrev *Abbrev, uint64_t &NumBits) {
     NumBits = 0;
     unsigned OpIndex = 0;
@@ -314,7 +314,7 @@ class BlockAbbrevs {
       const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(OpIndex);
       switch (Op.getEncoding()) {
       case NaClBitCodeAbbrevOp::Literal:
-        if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {
+        if (canUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {
           ++ValueIndex;
           ++OpIndex;
           continue;
@@ -327,12 +327,12 @@ class BlockAbbrevs {
             Abbrev->getOperandInfo(OpIndex+1);
 
         // Add size of array.
-        NumBits += MatchVBRBits(Values.size()-ValueIndex, DefaultVBRBits);
+        NumBits += matchVBRBits(Values.size()-ValueIndex, DefaultVBRBits);
 
         // Add size of each field.
         for (; ValueIndex != ValueIndexEnd; ++ValueIndex) {
           uint64_t FieldBits=0;
-          if (!CanUseSimpleAbbrevOp(ElmtOp, Values[ValueIndex], FieldBits)) {
+          if (!canUseSimpleAbbrevOp(ElmtOp, Values[ValueIndex], FieldBits)) {
             return false;
           }
           NumBits += FieldBits;
@@ -340,7 +340,7 @@ class BlockAbbrevs {
         return true;
       }
       default: {
-        if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {
+        if (canUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {
           ++ValueIndex;
           ++OpIndex;
           break;
@@ -355,7 +355,7 @@ class BlockAbbrevs {
   // Returns true if the given abbreviation Op defines a single value,
   // and can be applied to the given Val. Adds the number of bits the
   // abbreviation Op will generate to NumBits if Op applies.
-  static bool CanUseSimpleAbbrevOp(const NaClBitCodeAbbrevOp &Op,
+  static bool canUseSimpleAbbrevOp(const NaClBitCodeAbbrevOp &Op,
                                    uint64_t Val,
                                    uint64_t &NumBits) {
     switch (Op.getEncoding()) {
@@ -365,13 +365,13 @@ class BlockAbbrevs {
       return false;
     case NaClBitCodeAbbrevOp::Fixed: {
       uint64_t Width = Op.getValue();
-      if (!MatchFixedBits(Val, Width))
+      if (!matchFixedBits(Val, Width))
         return false;
       NumBits += Width;
       return true;
     }
     case NaClBitCodeAbbrevOp::VBR:
-      if (unsigned Width = MatchVBRBits(Val, Op.getValue())) {
+      if (unsigned Width = matchVBRBits(Val, Op.getValue())) {
         NumBits += Width;
         return true;
       } else {
@@ -387,7 +387,7 @@ class BlockAbbrevs {
 
   // Returns true if the given Val can be represented by abbreviation
   // operand Fixed(Width).
-  static bool MatchFixedBits(uint64_t Val, unsigned Width) {
+  static bool matchFixedBits(uint64_t Val, unsigned Width) {
     // Note: The reader only allows up to 32 bits for fixed values.
     if (Val & Mask32) return false;
     if (Val & ~(~0U >> (32-Width))) return false;
@@ -397,7 +397,7 @@ class BlockAbbrevs {
   // Returns the number of bits needed to represent Val by abbreviation
   // operand VBR(Width). Note: Returns 0 if Val can't be represented
   // by VBR(Width).
-  static unsigned MatchVBRBits(uint64_t Val, unsigned Width) {
+  static unsigned matchVBRBits(uint64_t Val, unsigned Width) {
     if (Width == 0) return 0;
     unsigned NumBits = 0;
     while (1) {
@@ -425,9 +425,9 @@ class BlockAbbrevs {
   // to a record.
   AbbrevLookupSizeMap LookupMap;
 
-  void PrintLookupMap(raw_ostream &Stream) {
+  void printLookupMap(raw_ostream &Stream) const {
     Stream << "------------------------------\n";
-    Stream << "Block " << GetBlockID() << " abbreviation tries:\n";
+    Stream << "Block " << getBlockID() << " abbreviation tries:\n";
     bool IsFirstIteration = true;
     for (AbbrevLookupSizeMap::const_iterator
            Iter = LookupMap.begin(), IterEnd = LookupMap.end();
@@ -450,6 +450,17 @@ class BlockAbbrevs {
 typedef DenseMap<unsigned, BlockAbbrevs*> BlockAbbrevsMapType;
 typedef std::pair<unsigned, BlockAbbrevs*> BlockAbbrevsMapElmtType;
 
+/// Gets the corresponding block abbreviations for a block ID.
+static BlockAbbrevs *getAbbrevs(BlockAbbrevsMapType &AbbrevsMap,
+                                unsigned BlockID) {
+  BlockAbbrevs *Abbrevs = AbbrevsMap[BlockID];
+  if (Abbrevs == nullptr) {
+    Abbrevs = new BlockAbbrevs(BlockID);
+    AbbrevsMap[BlockID] = Abbrevs;
+  }
+  return Abbrevs;
+}
+
 /// Parses the bitcode file, analyzes it, and generates the
 /// corresponding lists of global abbreviations to use in the
 /// generated (compressed) bitcode file.
@@ -503,7 +514,7 @@ class NaClBlockAnalyzeParser : public NaClBitcodeParser {
   NaClBlockAnalyzeParser(unsigned BlockID,
                          NaClAnalyzeParser *Context)
       : NaClBitcodeParser(BlockID, Context), Context(Context) {
-    Init();
+    init();
   }
 
   virtual ~NaClBlockAnalyzeParser() {
@@ -518,7 +529,7 @@ class NaClBlockAnalyzeParser : public NaClBitcodeParser {
                          NaClBlockAnalyzeParser *EnclosingParser)
       : NaClBitcodeParser(BlockID, EnclosingParser),
         Context(EnclosingParser->Context) {
-    Init();
+    init();
   }
 
 public:
@@ -534,18 +545,18 @@ class NaClBlockAnalyzeParser : public NaClBitcodeParser {
     if (Record.UsedAnAbbreviation()) {
       unsigned AbbrevIndex = Record.GetAbbreviationIndex();
       if (LocalAbbrevBitstreamToInternalMap.
-          DefinesBitstreamAbbrevIndex(AbbrevIndex)) {
+          definesBitstreamAbbrevIndex(AbbrevIndex)) {
         Record.SetAbbreviationIndex(
             LocalAbbrevBitstreamToInternalMap.
-            GetInternalAbbrevIndex(AbbrevIndex));
+            getInternalAbbrevIndex(AbbrevIndex));
       } else {
         AbbrevBitstreamToInternalMap &GlobalAbbrevBitstreamToInternalMap =
-            GlobalBlockAbbrevs->GetGlobalAbbrevBitstreamToInternalMap();
+            GlobalBlockAbbrevs->getGlobalAbbrevBitstreamToInternalMap();
         if (GlobalAbbrevBitstreamToInternalMap.
-            DefinesBitstreamAbbrevIndex(AbbrevIndex)) {
+            definesBitstreamAbbrevIndex(AbbrevIndex)) {
           Record.SetAbbreviationIndex(
               GlobalAbbrevBitstreamToInternalMap.
-              GetInternalAbbrevIndex(AbbrevIndex));
+              getInternalAbbrevIndex(AbbrevIndex));
         } else {
           report_fatal_error("Bad abbreviation index in file");
         }
@@ -561,12 +572,12 @@ class NaClBlockAnalyzeParser : public NaClBitcodeParser {
                                    NaClBitCodeAbbrev *Abbrev,
                                    bool IsLocal) {
     int Index;
-    AddAbbreviation(BlockID, Abbrev->Simplify(), Index);
+    addAbbreviation(BlockID, Abbrev->Simplify(), Index);
     if (IsLocal) {
-      LocalAbbrevBitstreamToInternalMap.InstallNewBitstreamAbbrevIndex(Index);
+      LocalAbbrevBitstreamToInternalMap.installNewBitstreamAbbrevIndex(Index);
     } else {
-      GetGlobalAbbrevs(BlockID)->GetGlobalAbbrevBitstreamToInternalMap().
-          InstallNewBitstreamAbbrevIndex(Index);
+      getGlobalAbbrevs(BlockID)->getGlobalAbbrevBitstreamToInternalMap().
+          installNewBitstreamAbbrevIndex(Index);
     }
   }
 
@@ -582,44 +593,35 @@ class NaClBlockAnalyzeParser : public NaClBitcodeParser {
 
   /// Returns the set of (global) block abbreviations defined for the
   /// given block ID.
-  BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) {
-    BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID];
-    if (Abbrevs == 0) {
-      Abbrevs = new BlockAbbrevs(BlockID);
-      Context->BlockAbbrevsMap[BlockID] = Abbrevs;
-    }
-    return Abbrevs;
-  }
-
-  void SetGlobalAbbrevs(unsigned BlockID, BlockAbbrevs *Abbrevs) {
-    Context->BlockAbbrevsMap[BlockID] = Abbrevs;
+  BlockAbbrevs *getGlobalAbbrevs(unsigned BlockID) {
+    return getAbbrevs(Context->BlockAbbrevsMap, BlockID);
   }
 
   // Adds the abbreviation to the list of abbreviations for the given
   // block.
-  void AddAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev,
+  void addAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev,
                        int &Index) {
     // Get block abbreviations.
-    BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(BlockID);
+    BlockAbbrevs* Abbrevs = getGlobalAbbrevs(BlockID);
 
     // Read abbreviation and add as a global abbreviation.
-    if (Abbrevs->AddAbbreviation(Abbrev, Index)
+    if (Abbrevs->addAbbreviation(Abbrev, Index)
         && Context->Flags.TraceGeneratedAbbreviations) {
-      PrintAbbrev(errs(), BlockID, Abbrev);
+      printAbbrev(errs(), BlockID, Abbrev);
     }
   }
 
   /// Finds the index to the corresponding internal block abbreviation
   /// for the given abbreviation.
-  int FindAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {
-    return GetGlobalAbbrevs(BlockID)->FindAbbreviation(Abbrev);
+  int findAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) {
+    return getGlobalAbbrevs(BlockID)->findAbbreviation(Abbrev);
   }
 
-  void Init() {
-    GlobalBlockAbbrevs = GetGlobalAbbrevs(GetBlockID());
-    LocalAbbrevBitstreamToInternalMap.SetNextBitstreamAbbrevIndex(
+  void init() {
+    GlobalBlockAbbrevs = getGlobalAbbrevs(GetBlockID());
+    LocalAbbrevBitstreamToInternalMap.setNextBitstreamAbbrevIndex(
         GlobalBlockAbbrevs->
-        GetGlobalAbbrevBitstreamToInternalMap().GetNextBitstreamAbbrevIndex());
+        getGlobalAbbrevBitstreamToInternalMap().getNextBitstreamAbbrevIndex());
   }
 };
 
@@ -663,12 +665,12 @@ class UnrolledAbbreviation {
                        bool CanBeBigger = false)
       : CodeOp(0) {
     unsigned NextOp = 0;
-    UnrollAbbrevOp(CodeOp, Abbrev, NextOp);
+    unrollAbbrevOp(CodeOp, Abbrev, NextOp);
     --Size;
     for (unsigned i = 0; i < Size; ++i) {
       // Create slot and then fill with appropriate operator.
       AbbrevOps.push_back(CodeOp);
-      UnrollAbbrevOp(AbbrevOps[i], Abbrev, NextOp);
+      unrollAbbrevOp(AbbrevOps[i], Abbrev, NextOp);
     }
     if (CanBeBigger) {
       for (; NextOp < Abbrev->getNumOperandInfos(); ++NextOp) {
@@ -690,8 +692,8 @@ class UnrolledAbbreviation {
 
   /// Prints out the abbreviation modeled by the unrolled
   /// abbreviation.
-  void Print(raw_ostream &Stream) const {
-    NaClBitCodeAbbrev *Abbrev = Restore(false);
+  void print(raw_ostream &Stream) const {
+    NaClBitCodeAbbrev *Abbrev = restore(false);
     Abbrev->Print(Stream);
     Abbrev->dropRef();
   }
@@ -699,7 +701,7 @@ class UnrolledAbbreviation {
   /// Converts the unrolled abbreviation back into a regular
   /// abbreviation. If Simplify is true, we simplify the
   /// unrolled abbreviation as well.
-  NaClBitCodeAbbrev *Restore(bool Simplify=true) const {
+  NaClBitCodeAbbrev *restore(bool Simplify=true) const {
     NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev();
     Abbrev->Add(CodeOp);
     for (std::vector<NaClBitCodeAbbrevOp>::const_iterator
@@ -733,7 +735,7 @@ class UnrolledAbbreviation {
 
   // Extracts out the next abbreviation operator from the abbreviation
   // Abbrev, given the index NextOp, and assigns it to AbbrevOp
-  void UnrollAbbrevOp(NaClBitCodeAbbrevOp &AbbrevOp,
+  void unrollAbbrevOp(NaClBitCodeAbbrevOp &AbbrevOp,
                       NaClBitCodeAbbrev *Abbrev,
                       unsigned &NextOp) {
     assert(NextOp < Abbrev->getNumOperandInfos());
@@ -776,25 +778,25 @@ class CandBlockAbbrev {
   }
 
   /// The block ID of the candidate abbreviation.
-  unsigned GetBlockID() const {
+  unsigned getBlockID() const {
     return BlockID;
   }
 
   /// The abbreviation of the candidate abbreviation.
-  const NaClBitCodeAbbrev *GetAbbrev() const {
+  const NaClBitCodeAbbrev *getAbbrev() const {
     return Abbrev;
   }
 
   /// orders this against the candidate abbreviation.
-  int Compare(const CandBlockAbbrev &CandAbbrev) const {
+  int compare(const CandBlockAbbrev &CandAbbrev) const {
     unsigned diff = BlockID - CandAbbrev.BlockID;
     if (diff) return diff;
     return Abbrev->Compare(*CandAbbrev.Abbrev);
   }
 
   /// Prints the candidate abbreviation to the given stream.
-  void Print(raw_ostream &Stream) const {
-    PrintAbbrev(Stream, BlockID, Abbrev);
+  void print(raw_ostream &Stream) const {
+    printAbbrev(Stream, BlockID, Abbrev);
   }
 
 private:
@@ -806,7 +808,7 @@ class CandBlockAbbrev {
 
 static inline bool operator<(const CandBlockAbbrev &A1,
                              const CandBlockAbbrev &A2) {
-  return A1.Compare(A2) < 0;
+  return A1.compare(A2) < 0;
 }
 
 /// Models the set of candidate abbreviations being considered, and
@@ -835,22 +837,22 @@ class CandidateAbbrevs {
   /// instances expected to use this candidate abbreviation. Returns
   /// true if the corresponding candidate abbreviation is added to this
   /// set of candidate abbreviations.
-  bool Add(unsigned BlockID,
+  bool add(unsigned BlockID,
            UnrolledAbbreviation &UnrolledAbbrev,
            unsigned NumInstances);
 
   /// Returns the list of candidate abbreviations in this set.
-  const AbbrevCountMap &GetAbbrevsMap() const {
+  const AbbrevCountMap &getAbbrevsMap() const {
     return AbbrevsMap;
   }
 
   /// Prints out the current contents of this set.
-  void Print(raw_ostream &Stream, const char *Title = "Candidates") const {
+  void print(raw_ostream &Stream, const char *Title = "Candidates") const {
     Stream << "-- " << Title << ": \n";
     for (const_iterator Iter = AbbrevsMap.begin(), IterEnd = AbbrevsMap.end();
          Iter != IterEnd; ++Iter) {
       Stream << format("%12u", Iter->second) << ": ";
-      Iter->first.Print(Stream);
+      Iter->first.print(Stream);
     }
     Stream << "--\n";
   }
@@ -863,13 +865,13 @@ class CandidateAbbrevs {
   BlockAbbrevsMapType &BlockAbbrevsMap;
 };
 
-bool CandidateAbbrevs::Add(unsigned BlockID,
+bool CandidateAbbrevs::add(unsigned BlockID,
                            UnrolledAbbreviation &UnrolledAbbrev,
                            unsigned NumInstances) {
   // Drop if it corresponds to an existing global abbreviation.
-  NaClBitCodeAbbrev *Abbrev = UnrolledAbbrev.Restore();
+  NaClBitCodeAbbrev *Abbrev = UnrolledAbbrev.restore();
   if (BlockAbbrevs* Abbrevs = BlockAbbrevsMap[BlockID]) {
-    if (Abbrevs->FindAbbreviation(Abbrev) !=
+    if (Abbrevs->findAbbreviation(Abbrev) !=
         BlockAbbrevs::NO_SUCH_ABBREVIATION) {
       Abbrev->dropRef();
       return false;
@@ -892,7 +894,7 @@ bool CandidateAbbrevs::Add(unsigned BlockID,
 // values at Index are distributed. Any found abbreviations are added
 // to the candidate abbreviations CandAbbrevs. Returns true only if we
 // have added new candidate abbreviations to CandAbbrevs.
-static bool AddNewAbbreviations(unsigned BlockID,
+static bool addNewAbbreviations(unsigned BlockID,
                                 const UnrolledAbbreviation &Abbrev,
                                 unsigned Index,
                                 NaClBitcodeValueDist &ValueDist,
@@ -921,7 +923,7 @@ static bool AddNewAbbreviations(unsigned BlockID,
     NaClBitcodeDistElement *Elmt = ValueDist.at(Range.first);
     UnrolledAbbreviation CandAbbrev(Abbrev);
     CandAbbrev.AbbrevOps[Index] = NaClBitCodeAbbrevOp(Range.first);
-    return CandAbbrevs.Add(BlockID, CandAbbrev, Elmt->GetNumInstances());
+    return CandAbbrevs.add(BlockID, CandAbbrev, Elmt->GetNumInstances());
   }
   return false;
 }
@@ -931,7 +933,7 @@ static bool AddNewAbbreviations(unsigned BlockID,
 // corresponding distribution of value indices associated with the
 // abbreviation.  Any found abbreviations are added to the candidate
 // abbreviations CandAbbrevs.
-static void AddNewAbbreviations(
+static void addNewAbbreviations(
     const NaClBitcodeCompressor::CompressFlags &Flags,
     unsigned BlockID, const UnrolledAbbreviation &Abbrev,
     NaClBitcodeDist &IndexDist, CandidateAbbrevs &CandAbbrevs) {
@@ -944,7 +946,7 @@ static void AddNewAbbreviations(
            IndexIterEnd = IndexDistribution->end();
        IndexIter != IndexIterEnd; ++IndexIter) {
     unsigned Index = static_cast<unsigned>(IndexIter->second);
-    if (AddNewAbbreviations(
+    if (addNewAbbreviations(
             BlockID, Abbrev, Index,
             cast<NaClBitcodeValueIndexDistElement>(IndexDist.at(Index))
             ->GetValueDist(),
@@ -959,7 +961,7 @@ static void AddNewAbbreviations(
 // SizeDist is the corresponding distribution of sizes associated with
 // the abbreviation. Any found abbreviations are added to the
 // candidate abbreviations CandAbbrevs.
-static void AddNewAbbreviations(
+static void addNewAbbreviations(
     const NaClBitcodeCompressor::CompressFlags &Flags,
     unsigned BlockID, NaClBitCodeAbbrev *Abbrev, unsigned Code,
     NaClBitcodeDist &SizeDist, CandidateAbbrevs &CandAbbrevs) {
@@ -976,11 +978,11 @@ static void AddNewAbbreviations(
       // Try making the code a literal.
       UnrolledAbbreviation CandAbbrev(UnrolledAbbrev);
       CandAbbrev.CodeOp = NaClBitCodeAbbrevOp(Code);
-      CandAbbrevs.Add(BlockID, CandAbbrev,
+      CandAbbrevs.add(BlockID, CandAbbrev,
                       SizeDist.at(Size)->GetNumInstances());
     }
     // Now process value indices to find candidate abbreviations.
-    AddNewAbbreviations(Flags, BlockID, UnrolledAbbrev,
+    addNewAbbreviations(Flags, BlockID, UnrolledAbbrev,
         cast<NaClBitcodeSizeDistElement>(SizeDist.at(Size))
         ->GetValueIndexDist(),
         CandAbbrevs);
@@ -992,7 +994,7 @@ static void AddNewAbbreviations(
 // BlockID. AbbrevDist is the distribution map of abbreviations
 // associated with BlockID. Any found abbreviations are added to the
 // candidate abbreviations CandAbbrevs.
-static void AddNewAbbreviations(
+static void addNewAbbreviations(
     const NaClBitcodeCompressor::CompressFlags &Flags, unsigned BlockID,
     BlockAbbrevs *Abbrevs, NaClBitcodeDist &AbbrevDist,
     CandidateAbbrevs &CandAbbrevs) {
@@ -1003,7 +1005,7 @@ static void AddNewAbbreviations(
            AbbrevIterEnd = AbbrevDistribution->end();
        AbbrevIter != AbbrevIterEnd; ++AbbrevIter) {
     NaClBitcodeDistValue AbbrevIndex = AbbrevIter->second;
-    NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(AbbrevIndex);
+    NaClBitCodeAbbrev *Abbrev = Abbrevs->getIndexedAbbrev(AbbrevIndex);
     NaClBitcodeAbbrevDistElement *AbbrevElmt =
         cast<NaClBitcodeAbbrevDistElement>(AbbrevDist.at(AbbrevIndex));
     NaClBitcodeDist &CodeDist = AbbrevElmt->GetCodeDist();
@@ -1015,7 +1017,7 @@ static void AddNewAbbreviations(
              CodeIterEnd = CodeDistribution->end();
          CodeIter != CodeIterEnd; ++CodeIter) {
       unsigned Code = static_cast<unsigned>(CodeIter->second);
-      AddNewAbbreviations(
+      addNewAbbreviations(
           Flags, BlockID, Abbrev, Code,
           cast<NaClCompressCodeDistElement>(CodeDist.at(CodeIter->second))
           ->GetSizeDist(),
@@ -1030,7 +1032,7 @@ typedef std::pair<unsigned, CandBlockAbbrev> CountedAbbrevType;
 // BlockDist.  BlockAbbrevsMap contains the set of read global
 // abbreviations. Adds found candidate abbreviations to the set of
 // known global abbreviations.
-static void AddNewAbbreviations(
+static void addNewAbbreviations(
     const NaClBitcodeCompressor::CompressFlags &Flags,
     NaClBitcodeBlockDist &BlockDist, BlockAbbrevsMapType &BlockAbbrevsMap) {
   CandidateAbbrevs CandAbbrevs(BlockAbbrevsMap);
@@ -1041,7 +1043,7 @@ static void AddNewAbbreviations(
            Iter = Distribution->begin(), IterEnd = Distribution->end();
        Iter != IterEnd; ++Iter) {
     unsigned BlockID = static_cast<unsigned>(Iter->second);
-    AddNewAbbreviations(
+    addNewAbbreviations(
         Flags, BlockID, BlockAbbrevsMap[BlockID],
         cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID))
         ->GetAbbrevDist(),
@@ -1060,20 +1062,20 @@ static void AddNewAbbreviations(
   // to 1000 records.  Assuming that both abbreviations shrink the
   // record by the same number of bits, we clearly want the tool to
   // choose the second abbreviation when selecting the abbreviation
-  // index to choose (via method GetRecordAbbrevIndex).
+  // index to choose (via method getRecordAbbrevIndex).
   //
   // Selecting the second is important in that abbreviation are
   // refined by successive calls to this tool. We do not want to
   // restrict downstream refinements prematurely.  Hence, we want the
   // tool to choose the abbreviation with the most candidates first.
   //
-  // Since method GetRecordAbbrevIndex chooses the first abbreviation
+  // Since method getRecordAbbrevIndex chooses the first abbreviation
   // that generates the least number of bits, we clearly want to make
   // sure that the second abbreviation occurs before the first.
   std::vector<CountedAbbrevType> Candidates;
   for (CandidateAbbrevs::const_iterator
-           Iter = CandAbbrevs.GetAbbrevsMap().begin(),
-           IterEnd = CandAbbrevs.GetAbbrevsMap().end();
+           Iter = CandAbbrevs.getAbbrevsMap().begin(),
+           IterEnd = CandAbbrevs.getAbbrevsMap().end();
        Iter != IterEnd; ++Iter) {
     Candidates.push_back(CountedAbbrevType(Iter->second,Iter->first));
   }
@@ -1088,14 +1090,14 @@ static void AddNewAbbreviations(
   unsigned Min = (Iter->first >> 2);
   for (; Iter != IterEnd; ++Iter) {
     if (Iter->first < Min) break;
-    unsigned BlockID = Iter->second.GetBlockID();
+    unsigned BlockID = Iter->second.getBlockID();
     BlockAbbrevs *Abbrevs = BlockAbbrevsMap[BlockID];
-    NaClBitCodeAbbrev *Abbrev = Iter->second.GetAbbrev()->Copy();
+    NaClBitCodeAbbrev *Abbrev = Iter->second.getAbbrev()->Copy();
     if (Flags.TraceGeneratedAbbreviations) {
       errs() <<format("%12u", Iter->first) << ": ";
-      PrintAbbrev(errs(), BlockID, Abbrev);
+      printAbbrev(errs(), BlockID, Abbrev);
     }
-    Abbrevs->AddAbbreviation(Abbrev);
+    Abbrevs->addAbbreviation(Abbrev);
   }
   if (Flags.TraceGeneratedAbbreviations) {
     errs() << "--\n";
@@ -1105,7 +1107,7 @@ static void AddNewAbbreviations(
 // Walks the block distribution (BlockDist), sorting entries based
 // on the distribution of blocks and abbreviations, and then
 // prints out the frequency of each abbreviation used.
-static void DisplayAbbreviationFrequencies(
+static void displayAbbreviationFrequencies(
     raw_ostream &Output,
     const NaClBitcodeBlockDist &BlockDist,
     const BlockAbbrevsMapType &BlockAbbrevsMap) {
@@ -1133,7 +1135,7 @@ static void DisplayAbbreviationFrequencies(
         unsigned Count = AbbrevDist.at(Index)->GetNumInstances();
         Output << format("%8u (%6.2f%%): ", Count,
                          (double) Count/Total*100.0);
-        BlockPos->second->GetIndexedAbbrev(Index)->Print(Output);
+        BlockPos->second->getIndexedAbbrev(Index)->Print(Output);
       }
     }
     Output << '\n';
@@ -1144,7 +1146,7 @@ static void DisplayAbbreviationFrequencies(
 // to use, from memory buffer MemBuf containing the input bitcode file.
 // If ShowAbbreviationFrequencies or Flag.ShowValueDistributions are
 // defined, the corresponding results is printed to Output.
-static bool AnalyzeBitcode(
+static bool analyzeBitcode(
     const NaClBitcodeCompressor::CompressFlags &Flags,
     MemoryBuffer *MemBuf,
     raw_ostream &Output,
@@ -1173,18 +1175,241 @@ static bool AnalyzeBitcode(
   }
 
   if (Flags.ShowAbbreviationFrequencies)
-    DisplayAbbreviationFrequencies(Output, Parser.BlockDist, BlockAbbrevsMap);
+    displayAbbreviationFrequencies(Output, Parser.BlockDist, BlockAbbrevsMap);
   if (Flags.ShowValueDistributions)
     Parser.BlockDist.Print(Output);
 
-  AddNewAbbreviations(Flags, Parser.BlockDist, BlockAbbrevsMap);
+  addNewAbbreviations(Flags, Parser.BlockDist, BlockAbbrevsMap);
+  return false;
+}
+
+/// Models a queue of selected abbreviation indices, for each record
+/// in all instances of a given block. Elements are added in the order
+/// they appear in the bitcode file.
+///
+/// The goal is to remove abbreviations that are not really used, from
+/// the list of candidate abbrevations, reducing the number of
+/// abbreviations needed. This is important because as the number of
+/// abbreviations increase, the number of bits needed to store the
+/// abbreviations also increase. By removing unnecessary
+/// abbreviations, this improves the ability of this executable to
+/// compress the bitcode file.
+class SelectedAbbrevsQueue {
+  SelectedAbbrevsQueue(const SelectedAbbrevsQueue &) = delete;
+  SelectedAbbrevsQueue &operator=(const SelectedAbbrevsQueue &) = delete;
+
+  // The minimum number of times an abbreviation must be used in the
+  // compressed version of the bitcode file, if it is going to be used
+  // at all.
+  static const uint32_t MinUsageCount = 5;
+
+public:
+  SelectedAbbrevsQueue() : AbbrevsIndexQueueFront(0) {}
+
+  /// Adds the given selected abbreviation index to the end of the
+  /// queue.
+  void addIndex(unsigned Index) { AbbrevsIndexQueue.push_back(Index); }
+
+  /// Removes the next selected abbreviation index from the
+  /// queue.
+  unsigned removeIndex() {
+    assert(AbbrevsIndexQueueFront < AbbrevsIndexQueue.size());
+    return AbbrevsIndexQueue[AbbrevsIndexQueueFront++];
+  }
+
+  /// Takes the abbreviation indices on the queue, determines what
+  /// subset of abbreviations should be kept, and puts them on the
+  /// list of abbreviations defined by getKeptAbbrevs. Updates the
+  /// abbreviation idices on the queue to match the corresponding kept
+  /// abbreviations.
+  ///
+  /// Should be called after the last call to AddIndex, and before
+  /// calling removeIndex.
+  void installFrequentlyUsedAbbrevs(BlockAbbrevs *Abbrevs);
+
+  /// Return the list of kept abbreviations, for the corresponding
+  /// block, in the order they should be defined.
+  const std::vector<NaClBitCodeAbbrev *> &getKeptAbbrevs() const {
+    return KeptAbbrevs;
+  }
+
+  /// Returns the maximum abbreviation index used by the kept
+  /// abbreviations.
+  unsigned getMaxKeptAbbrevIndex() const {
+    return KeptAbbrevs.size() + naclbitc::DEFAULT_MAX_ABBREV;
+  }
+
+protected:
+  // Defines a queue of abbreviations indices using a
+  // vector. AbbrevsIndexQueueFront is used to point to the front of
+  // the queue.
+  std::vector<unsigned> AbbrevsIndexQueue;
+  unsigned AbbrevsIndexQueueFront;
+  // The list of abbreviations that should be defined for the block,
+  // in the order they should be defined.
+  std::vector<NaClBitCodeAbbrev *> KeptAbbrevs;
+};
+
+void SelectedAbbrevsQueue::installFrequentlyUsedAbbrevs(BlockAbbrevs *Abbrevs) {
+  // Start by collecting usage counts for each abbreviation.
+  assert(AbbrevsIndexQueueFront == 0);
+  assert(KeptAbbrevs.empty());
+  std::map<unsigned, uint32_t> UsageMap;
+  for (unsigned Index : AbbrevsIndexQueue) {
+    if (Index != naclbitc::UNABBREV_RECORD)
+      ++UsageMap[Index];
+  }
+
+  // Install results
+  std::map<unsigned, unsigned> KeepIndexMap;
+  for (const std::pair<unsigned, uint32_t> &Pair : UsageMap) {
+    if (Pair.second >= MinUsageCount) {
+      KeepIndexMap[Pair.first] =
+          KeptAbbrevs.size() + naclbitc::FIRST_APPLICATION_ABBREV;
+      KeptAbbrevs.push_back(Abbrevs->getIndexedAbbrev(Pair.first));
+    }
+  }
+
+  // Update the queue of selected abbreviation indices to match kept
+  // abbreviations.
+  for (unsigned &AbbrevIndex : AbbrevsIndexQueue) {
+    std::map<unsigned, unsigned>::const_iterator NewPos =
+        KeepIndexMap.find(AbbrevIndex);
+    AbbrevIndex = NewPos == KeepIndexMap.end() ? naclbitc::UNABBREV_RECORD
+                                               : NewPos->second;
+  }
+}
+
+/// Models the kept queue of abbreviations associated with each block ID.
+typedef std::map<unsigned, SelectedAbbrevsQueue *> BlockAbbrevsQueueMap;
+typedef std::pair<unsigned, SelectedAbbrevsQueue *> BlockAbbrevsQueueMapElmt;
+
+/// Installs frequently used abbreviations for each of the blocks
+/// defined in AbbrevsQueueMap, based on the abbreviations in the
+/// corresponding AbbrevsMap
+static void
+installFrequentlyUsedAbbrevs(BlockAbbrevsMapType &AbbrevsMap,
+                             BlockAbbrevsQueueMap &AbbrevsQueueMap) {
+  for (const BlockAbbrevsQueueMapElmt &Pair : AbbrevsQueueMap) {
+    if (SelectedAbbrevsQueue *SelectedAbbrevs = Pair.second)
+      SelectedAbbrevs->installFrequentlyUsedAbbrevs(AbbrevsMap[Pair.first]);
+  }
+}
+
+/// Top level parser to queue selected abbreviation indices (within
+/// the bitcode file), so that we can remove unused abbreviations from
+/// the collected list of abbreviations before generating the final,
+/// compressed bitcode file.
+class NaClAssignAbbrevsParser : public NaClBitcodeParser {
+public:
+  NaClAssignAbbrevsParser(NaClBitstreamCursor &Cursor,
+                          BlockAbbrevsMapType &AbbrevsMap,
+                          BlockAbbrevsQueueMap &AbbrevsQueueMap)
+      : NaClBitcodeParser(Cursor), AbbrevsMap(AbbrevsMap),
+        AbbrevsQueueMap(AbbrevsQueueMap) {}
+
+  ~NaClAssignAbbrevsParser() final {}
+
+  bool ParseBlock(unsigned BlockID) final;
+
+  /// The abbreviations to use for the compressed bitcode.
+  BlockAbbrevsMapType &AbbrevsMap;
+
+  /// The corresponding selected abbreviation indices to for each
+  /// block.
+  BlockAbbrevsQueueMap &AbbrevsQueueMap;
+};
+
+/// Block parser to queue abbreviation indices used by the records in
+/// the block.
+class NaClAssignAbbrevsBlockParser : public NaClBitcodeParser {
+public:
+  NaClAssignAbbrevsBlockParser(unsigned BlockID,
+                               NaClAssignAbbrevsParser *Context)
+      : NaClBitcodeParser(BlockID, Context), BlockID(BlockID),
+        Context(Context) {
+    init();
+  }
+
+  ~NaClAssignAbbrevsBlockParser() final {}
+
+protected:
+  unsigned BlockID;
+  NaClAssignAbbrevsParser *Context;
+  // Cached abbreviations defined for this block,.
+  BlockAbbrevs *Abbreviations;
+  // Cached abbreviations queue to add abbreviation indices to.
+  SelectedAbbrevsQueue *Queue;
+
+  NaClAssignAbbrevsBlockParser(unsigned BlockID,
+                               NaClAssignAbbrevsBlockParser *EnclosingParser)
+      : NaClBitcodeParser(BlockID, EnclosingParser), BlockID(BlockID),
+        Context(EnclosingParser->Context) {
+    init();
+  }
+
+  void init() {
+    Abbreviations = getAbbrevs(Context->AbbrevsMap, BlockID);
+    Queue = Context->AbbrevsQueueMap[BlockID];
+    if (Queue == nullptr) {
+      Queue = new SelectedAbbrevsQueue();
+      Context->AbbrevsQueueMap[BlockID] = Queue;
+    }
+  }
+
+  bool ParseBlock(unsigned BlockID) final {
+    NaClAssignAbbrevsBlockParser Parser(BlockID, this);
+    return Parser.ParseThisBlock();
+  }
+
+  void ProcessRecord() final {
+    // Find best fitting abbreviation to use, and save.
+    Queue->addIndex(
+        Abbreviations->getRecordAbbrevIndex(Record.GetRecordData()));
+  }
+};
+
+bool NaClAssignAbbrevsParser::ParseBlock(unsigned BlockID) {
+  NaClAssignAbbrevsBlockParser Parser(BlockID, this);
+  return Parser.ParseThisBlock();
+}
+
+// Reads the bitcode in MemBuf, using the abbreviations in AbbrevsMap,
+// and queues the selected abbrevations for each record into
+// AbbrevsQueueMap.
+static bool chooseAbbrevs(MemoryBuffer *MemBuf, BlockAbbrevsMapType &AbbrevsMap,
+                          BlockAbbrevsQueueMap &AbbrevsQueueMap) {
+  const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
+  const unsigned char *EndBufPtr = BufPtr + MemBuf->getBufferSize();
+
+  // Read header. No verification is needed since AnalyzeBitcode has
+  // already checked it.
+  NaClBitcodeHeader Header;
+  if (Header.Read(BufPtr, EndBufPtr))
+    return Error("Invalid PNaCl bitcode header");
+
+  // Create the bitcode reader.
+  NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
+  NaClBitstreamCursor Stream(StreamFile);
+
+  // Set up the parser.
+  NaClAssignAbbrevsParser Parser(Stream, AbbrevsMap, AbbrevsQueueMap);
+
+  // Parse the bitcode and choose abbreviations for records.
+  while (!Stream.AtEndOfStream()) {
+    if (Parser.Parse()) {
+      installFrequentlyUsedAbbrevs(AbbrevsMap, AbbrevsQueueMap);
+      return true;
+    }
+  }
+  installFrequentlyUsedAbbrevs(AbbrevsMap, AbbrevsQueueMap);
   return false;
 }
 
 /// Parses the input bitcode file and generates the corresponding
 /// compressed bitcode file, by replacing abbreviations in the input
 /// file with the corresponding abbreviations defined in
-/// BlockAbbrevsMap.
+/// BlockAbbrevsMap using the selected abbreviations in AbbrevsQueueMap.
 class NaClBitcodeCopyParser : public NaClBitcodeParser {
 public:
   // Top-level constructor to build the appropriate block parser
@@ -1192,21 +1417,24 @@ class NaClBitcodeCopyParser : public NaClBitcodeParser {
   NaClBitcodeCopyParser(const NaClBitcodeCompressor::CompressFlags &Flags,
                         NaClBitstreamCursor &Cursor,
                         BlockAbbrevsMapType &BlockAbbrevsMap,
+                        BlockAbbrevsQueueMap &AbbrevsQueueMap,
                         NaClBitstreamWriter &Writer)
-      : NaClBitcodeParser(Cursor),
-        Flags(Flags),
-        BlockAbbrevsMap(BlockAbbrevsMap),
+      : NaClBitcodeParser(Cursor), Flags(Flags),
+        BlockAbbrevsMap(BlockAbbrevsMap), AbbrevsQueueMap(AbbrevsQueueMap),
         Writer(Writer) {}
 
   virtual ~NaClBitcodeCopyParser() {}
 
-  virtual bool ParseBlock(unsigned BlockID);
+  bool ParseBlock(unsigned BlockID) final;
 
   const NaClBitcodeCompressor::CompressFlags &Flags;
 
   // The abbreviations to use for the copied bitcode.
   BlockAbbrevsMapType &BlockAbbrevsMap;
 
+  // Map defining the selected abbreviations for each block.
+  BlockAbbrevsQueueMap &AbbrevsQueueMap;
+
   // The bitstream to copy the compressed bitcode into.
   NaClBitstreamWriter &Writer;
 };
@@ -1214,12 +1442,11 @@ class NaClBitcodeCopyParser : public NaClBitcodeParser {
 class NaClBlockCopyParser : public NaClBitcodeParser {
 public:
   // Top-level constructor to build the appropriate block parser.
-  NaClBlockCopyParser(unsigned BlockID,
-                      NaClBitcodeCopyParser *Context)
-      : NaClBitcodeParser(BlockID, Context),
-        Context(Context),
-        BlockAbbreviations(0)
-  {}
+  NaClBlockCopyParser(unsigned BlockID, NaClBitcodeCopyParser *Context)
+      : NaClBitcodeParser(BlockID, Context), Context(Context),
+        BlockAbbreviations(nullptr), SelectedAbbrevs(nullptr) {
+    init();
+  }
 
   virtual ~NaClBlockCopyParser() {}
 
@@ -1231,24 +1458,31 @@ class NaClBlockCopyParser : public NaClBitcodeParser {
   // EnterBlock).
   BlockAbbrevs *BlockAbbreviations;
 
+  // The corresonding selected abbreviations.
+  SelectedAbbrevsQueue *SelectedAbbrevs;
+
   /// Constructor to parse nested blocks.  Creates a block parser to
   /// parse in a block with the given BlockID, and write the block
   /// back out using the abbreviations in BlockAbbrevsMap.
   NaClBlockCopyParser(unsigned BlockID,
                       NaClBlockCopyParser *EnclosingParser)
       : NaClBitcodeParser(BlockID, EnclosingParser),
-        Context(EnclosingParser->Context)
-  {}
+        Context(EnclosingParser->Context), BlockAbbreviations(nullptr),
+        SelectedAbbrevs(nullptr) {
+    init();
+  }
+
+  void init() {
+    unsigned BlockID = GetBlockID();
+    BlockAbbreviations = getGlobalAbbrevs(BlockID);
+    SelectedAbbrevs = Context->AbbrevsQueueMap[BlockID];
+    assert(SelectedAbbrevs);
+  }
 
   /// Returns the set of (global) block abbreviations defined for the
   /// given block ID.
-  BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) {
-    BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID];
-    if (Abbrevs == 0) {
-      Abbrevs = new BlockAbbrevs(BlockID);
-      Context->BlockAbbrevsMap[BlockID] = Abbrevs;
-    }
-    return Abbrevs;
+  BlockAbbrevs *getGlobalAbbrevs(unsigned BlockID) {
+    return getAbbrevs(Context->BlockAbbrevsMap, BlockID);
   }
 
   virtual bool ParseBlock(unsigned BlockID) {
@@ -1257,14 +1491,13 @@ class NaClBlockCopyParser : public NaClBitcodeParser {
   }
 
   virtual void EnterBlock(unsigned NumWords) {
-    unsigned BlockID = GetBlockID();
-    BlockAbbreviations = GetGlobalAbbrevs(BlockID);
-
     // Enter the subblock.
-    NaClBitcodeSelectorAbbrev
-        Selector(BlockAbbreviations->GetNumberAbbreviations()-1);
+    NaClBitcodeSelectorAbbrev Selector(
+        SelectedAbbrevs->getMaxKeptAbbrevIndex());
     if (Context->Flags.RemoveAbbreviations)
       Selector = NaClBitcodeSelectorAbbrev();
+
+    unsigned BlockID = GetBlockID();
     Context->Writer.EnterSubblock(BlockID, Selector);
 
     if (BlockID != naclbitc::MODULE_BLOCK_ID
@@ -1274,22 +1507,18 @@ class NaClBlockCopyParser : public NaClBitcodeParser {
     // To keep things simple, we dump all abbreviations immediately
     // inside the module block. Start by dumping module abbreviations
     // as local abbreviations.
-    BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(naclbitc::MODULE_BLOCK_ID);
-    for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation();
-         i < Abbrevs->GetNumberAbbreviations(); ++i) {
-      Context->Writer.EmitAbbrev(Abbrevs->GetIndexedAbbrev(i)->Copy());
+    for (auto Abbrev : SelectedAbbrevs->getKeptAbbrevs()) {
+      Context->Writer.EmitAbbrev(Abbrev->Copy());
     }
 
     // Insert the block info block, if needed, so that nested blocks
     // will have defined abbreviations.
     bool HasNonmoduleAbbrevs = false;
-    for (const BlockAbbrevsMapElmtType &Pair : Context->BlockAbbrevsMap) {
-      if (Pair.second == nullptr ||
-          !Pair.second->hasApplicationAbbreviations()) continue;
-      if (Pair.first != naclbitc::MODULE_BLOCK_ID) {
-        HasNonmoduleAbbrevs = true;
-        break;
-      }
+    for (const BlockAbbrevsQueueMapElmt &Pair : Context->AbbrevsQueueMap) {
+      if (Pair.second->getKeptAbbrevs().empty())
+        continue;
+      HasNonmoduleAbbrevs = true;
+      break;
     }
     if (!HasNonmoduleAbbrevs)
       return;
@@ -1302,11 +1531,13 @@ class NaClBlockCopyParser : public NaClBitcodeParser {
       if (BlockID == naclbitc::MODULE_BLOCK_ID)
         continue;
       BlockAbbrevs *Abbrevs = Pair.second;
-      if (Abbrevs == nullptr) continue;
-      for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation();
-           i < Abbrevs->GetNumberAbbreviations(); ++i) {
-        Context->Writer.EmitBlockInfoAbbrev(BlockID,
-                                            Abbrevs->GetIndexedAbbrev(i));
+      if (Abbrevs == nullptr)
+        continue;
+      if (SelectedAbbrevsQueue *SelectedAbbrevs =
+              Context->AbbrevsQueueMap[BlockID]) {
+        for (auto Abbrev : SelectedAbbrevs->getKeptAbbrevs()) {
+          Context->Writer.EmitBlockInfoAbbrev(BlockID, Abbrev->Copy());
+        }
       }
     }
     Context->Writer.ExitBlock();
@@ -1323,9 +1554,8 @@ class NaClBlockCopyParser : public NaClBitcodeParser {
       return;
     }
     // Find best fitting abbreviation to use, and print out the record
-    // using that abbreviations.
-    unsigned AbbrevIndex =
-        BlockAbbreviations->GetRecordAbbrevIndex(Record.GetRecordData());
+    // using that abbreviation.
+    unsigned AbbrevIndex = SelectedAbbrevs->removeIndex();
     if (AbbrevIndex == naclbitc::UNABBREV_RECORD) {
       Context->Writer.EmitRecord(Record.GetCode(), Values, 0);
     } else {
@@ -1342,13 +1572,13 @@ bool NaClBitcodeCopyParser::ParseBlock(unsigned BlockID) {
 // Read in bitcode, and write it back out using the abbreviations in
 // BlockAbbrevsMap, from memory buffer MemBuf containing the input
 // bitcode file. The bitcode is copied to Output.
-static bool CopyBitcode(const NaClBitcodeCompressor::CompressFlags &Flags,
-                        MemoryBuffer *MemBuf,
-                        raw_ostream &Output,
-                        BlockAbbrevsMapType &BlockAbbrevsMap) {
+static bool copyBitcode(const NaClBitcodeCompressor::CompressFlags &Flags,
+                        MemoryBuffer *MemBuf, raw_ostream &Output,
+                        BlockAbbrevsMapType &BlockAbbrevsMap,
+                        BlockAbbrevsQueueMap &AbbrevsQueueMap) {
 
   const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
-  const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
+  const unsigned char *EndBufPtr = BufPtr + MemBuf->getBufferSize();
 
   // Read header. No verification is needed since AnalyzeBitcode has
   // already checked it.
@@ -1362,35 +1592,37 @@ static bool CopyBitcode(const NaClBitcodeCompressor::CompressFlags &Flags,
 
   // Create the bitcode writer.
   SmallVector<char, 0> OutputBuffer;
-  OutputBuffer.reserve(256*1024);
+  OutputBuffer.reserve(256 * 1024);
   NaClBitstreamWriter StreamWriter(OutputBuffer);
 
   // Emit the file header.
   NaClWriteHeader(Header, StreamWriter);
 
   // Set up the parser.
-  NaClBitcodeCopyParser Parser(Flags, Stream, BlockAbbrevsMap, StreamWriter);
+  NaClBitcodeCopyParser Parser(Flags, Stream, BlockAbbrevsMap, AbbrevsQueueMap,
+                               StreamWriter);
 
   // Parse the bitcode and copy.
   while (!Stream.AtEndOfStream()) {
-    if (Parser.Parse()) return true;
+    if (Parser.Parse())
+      return true;
   }
 
   // Write out the copied results.
-  Output.write((char*)&OutputBuffer.front(), OutputBuffer.size());
+  Output.write((char *)&OutputBuffer.front(), OutputBuffer.size());
   return false;
 }
 
 // Build fast lookup abbreviation maps for each of the abbreviation blocks
 // defined in AbbrevsMap.
-static void BuildAbbrevLookupMaps(
+static void buildAbbrevLookupMaps(
     const NaClBitcodeCompressor::CompressFlags &Flags,
     BlockAbbrevsMapType &AbbrevsMap) {
   for (BlockAbbrevsMapType::const_iterator
            Iter = AbbrevsMap.begin(),
            IterEnd = AbbrevsMap.end();
        Iter != IterEnd; ++Iter) {
-    Iter->second->BuildAbbrevLookupSizeMap(Flags);
+    Iter->second->buildAbbrevLookupSizeMap(Flags);
   }
 }
 
@@ -1398,15 +1630,23 @@ static void BuildAbbrevLookupMaps(
 
 bool NaClBitcodeCompressor::analyze(MemoryBuffer *MemBuf, raw_ostream &Output) {
   BlockAbbrevsMapType BlockAbbrevsMap;
-  return !AnalyzeBitcode(Flags, MemBuf, Output, BlockAbbrevsMap);
+  return !analyzeBitcode(Flags, MemBuf, Output, BlockAbbrevsMap);
 }
 
 bool NaClBitcodeCompressor::compress(MemoryBuffer *MemBuf,
                                      raw_ostream &BitcodeOutput,
                                      raw_ostream &ShowOutput) {
   BlockAbbrevsMapType BlockAbbrevsMap;
-  if (AnalyzeBitcode(Flags, MemBuf, ShowOutput, BlockAbbrevsMap)) return false;
-  BuildAbbrevLookupMaps(Flags, BlockAbbrevsMap);
-  if (CopyBitcode(Flags, MemBuf, BitcodeOutput, BlockAbbrevsMap)) return false;
-  return true;
+  if (analyzeBitcode(Flags, MemBuf, ShowOutput, BlockAbbrevsMap))
+    return false;
+  buildAbbrevLookupMaps(Flags, BlockAbbrevsMap);
+  BlockAbbrevsQueueMap AbbrevsQueueMap;
+  bool Result = true;
+  if (chooseAbbrevs(MemBuf, BlockAbbrevsMap, AbbrevsQueueMap))
+    Result = false;
+  else if (copyBitcode(Flags, MemBuf, BitcodeOutput, BlockAbbrevsMap,
+                       AbbrevsQueueMap))
+    Result = false;
+  DeleteContainerSeconds(AbbrevsQueueMap);
+  return Result;
 }
diff --git a/lib/Bitcode/NaCl/Analysis/NaClObjDump.cpp b/lib/Bitcode/NaCl/Analysis/NaClObjDump.cpp
index d56ee291ce3..f1606596baf 100644
--- a/lib/Bitcode/NaCl/Analysis/NaClObjDump.cpp
+++ b/lib/Bitcode/NaCl/Analysis/NaClObjDump.cpp
@@ -3518,19 +3518,19 @@ bool NaClDisTopLevelParser::ParseBlock(unsigned BlockID) {
 
 namespace llvm {
 
-bool NaClObjDump(MemoryBuffer *MemBuf, raw_ostream &Output,
+bool NaClObjDump(MemoryBufferRef MemBuf, raw_ostream &Output,
                  bool NoRecords, bool NoAssembly) {
   // Create objects needed to run parser.
   naclbitc::ObjDumpStream ObjDump(Output, !NoRecords, !NoAssembly);
 
-  if (MemBuf->getBufferSize() % 4 != 0) {
+  if (MemBuf.getBufferSize() % 4 != 0) {
     ObjDump.Error()
         << "Bitcode stream should be a multiple of 4 bytes in length.\n";
     return true;
   }
 
-  const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
-  const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
+  const unsigned char *BufPtr = (const unsigned char *)MemBuf.getBufferStart();
+  const unsigned char *EndBufPtr = BufPtr+MemBuf.getBufferSize();
   const unsigned char *HeaderPtr = BufPtr;
 
   // Read header and verify it is good.
diff --git a/lib/Bitcode/NaCl/TestUtils/NaClBitcodeMunge.cpp b/lib/Bitcode/NaCl/TestUtils/NaClBitcodeMunge.cpp
index 029578234a3..272634654b8 100644
--- a/lib/Bitcode/NaCl/TestUtils/NaClBitcodeMunge.cpp
+++ b/lib/Bitcode/NaCl/TestUtils/NaClBitcodeMunge.cpp
@@ -41,9 +41,8 @@ void NaClBitcodeMunger::setupTest(
   DumpResults.clear(); // Throw away any previous results.
   std::string DumpBuffer;
   DumpStream = new raw_string_ostream(DumpResults);
-  SmallVector<char, 0> StreamBuffer;
-  StreamBuffer.reserve(256*1024);
-  NaClBitstreamWriter OutStream(StreamBuffer);
+  MungedInputBuffer.clear();
+  NaClBitstreamWriter OutStream(MungedInputBuffer);
   Writer = &OutStream;
 
   if (DebugEmitRecord) {
@@ -54,14 +53,13 @@ void NaClBitcodeMunger::setupTest(
   SetBID = -1;
   writeMungedData(Munges, MungesSize, AddHeader);
 
-  std::string OutBuffer;
-  raw_string_ostream BitcodeStrm(OutBuffer);
-  for (SmallVectorImpl<char>::const_iterator
-           Iter = StreamBuffer.begin(), IterEnd = StreamBuffer.end();
-       Iter != IterEnd; ++Iter) {
-    BitcodeStrm << *Iter;
-  }
-  MungedInput = MemoryBuffer::getMemBufferCopy(BitcodeStrm.str(), TestName);
+  // Add null terminator, so that we meet the requirements of the
+  // MemoryBuffer API.
+  MungedInputBuffer.push_back('\0');
+
+  MungedInput = MemoryBuffer::getMemBuffer(
+      StringRef(MungedInputBuffer.data(), MungedInputBuffer.size()-1),
+      TestName);
 }
 
 void NaClBitcodeMunger::cleanupTest() {
@@ -401,9 +399,8 @@ bool NaClObjDumpMunger::runTestWithFlags(
   /// error stream (rather than buffering in DumpStream), so that
   /// output can be seen in gtest death test.
   raw_ostream &Output = RunAsDeathTest ? errs() : *DumpStream;
-  // TODO(jvoung,kschimpf): Should NaClObjDump take a MemoryBufferRef
-  // like the parser?
-  if (NaClObjDump(MungedInput.get(), Output, NoRecords, NoAssembly))
+  if (NaClObjDump(MungedInput.get()->getMemBufferRef(),
+                  Output, NoRecords, NoAssembly))
     FoundErrors = true;
   cleanupTest();
   return !FoundErrors;
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 3c44e5a4b37..446881a4f5e 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -23,7 +23,6 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case aarch64_be:  return "aarch64_be";
   case arm:         return "arm";
   case armeb:       return "armeb";
-  case asmjs:       return "asmjs";
   case hexagon:     return "hexagon";
   case mips:        return "mips";
   case mipsel:      return "mipsel";
@@ -47,6 +46,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case nvptx64:     return "nvptx64";
   case le32:        return "le32";
   case le64:        return "le64";
+  case asmjs:       return "asmjs"; // @LOCALMOD Emscripten
   case amdil:       return "amdil";
   case amdil64:     return "amdil64";
   case hsail:       return "hsail";
@@ -67,8 +67,6 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   case aarch64:
   case aarch64_be:  return "aarch64";
 
-  case asmjs:   return "asmjs";
-
   case arm:
   case armeb:
   case thumb:
@@ -103,6 +101,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   case le32:        return "le32";
   case le64:        return "le64";
 
+  case asmjs:       return "asmjs"; // @LOCALMOD Emscripten
+
   case amdil:
   case amdil64:     return "amdil";
 
@@ -141,7 +141,6 @@ const char *Triple::getOSTypeName(OSType Kind) {
 
   case Darwin: return "darwin";
   case DragonFly: return "dragonfly";
-  case Emscripten: return "emscripten";
   case FreeBSD: return "freebsd";
   case IOS: return "ios";
   case KFreeBSD: return "kfreebsd";
@@ -155,6 +154,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case Haiku: return "haiku";
   case Minix: return "minix";
   case RTEMS: return "rtems";
+  case Emscripten: return "emscripten"; // @LOCALMOD Emscripten
   case NaCl: return "nacl";
   case CNK: return "cnk";
   case Bitrig: return "bitrig";
@@ -217,6 +217,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("le32", le32)
     .Case("asmjs", asmjs)
     .Case("le64", le64)
+    .Case("asmjs", asmjs) // @LOCALMOD Emscripten
     .Case("amdil", amdil)
     .Case("amdil64", amdil64)
     .Case("hsail", hsail)
@@ -301,6 +302,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("le32", Triple::le32)
     .Case("asmjs", Triple::asmjs)
     .Case("le64", Triple::le64)
+    .Case("asmjs", Triple::asmjs) // @LOCALMOD Emscripten
     .Case("amdil", Triple::amdil)
     .Case("amdil64", Triple::amdil64)
     .Case("hsail", Triple::hsail)
@@ -345,6 +347,7 @@ static Triple::OSType parseOS(StringRef OSName) {
     .StartsWith("haiku", Triple::Haiku)
     .StartsWith("minix", Triple::Minix)
     .StartsWith("rtems", Triple::RTEMS)
+    .StartsWith("emscripten", Triple::Emscripten) // @LOCALMOD Emscripten
     .StartsWith("nacl", Triple::NaCl)
     .StartsWith("cnk", Triple::CNK)
     .StartsWith("bitrig", Triple::Bitrig)
@@ -848,9 +851,9 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
 
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
-  case llvm::Triple::asmjs:
   case llvm::Triple::hexagon:
   case llvm::Triple::le32:
+  case llvm::Triple::asmjs: // @LOCALMOD Emscripten
   case llvm::Triple::mips:
   case llvm::Triple::mipsel:
   case llvm::Triple::nvptx:
@@ -916,10 +919,10 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::spir:
   case Triple::arm:
   case Triple::armeb:
-  case Triple::asmjs:
   case Triple::hexagon:
   case Triple::kalimba:
   case Triple::le32:
+  case Triple::asmjs: // @LOCALMOD Emscripten
   case Triple::mips:
   case Triple::mipsel:
   case Triple::nvptx:
@@ -954,7 +957,6 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::UnknownArch:
   case Triple::arm:
   case Triple::armeb:
-  case Triple::asmjs:
   case Triple::hexagon:
   case Triple::kalimba:
   case Triple::msp430:
@@ -963,6 +965,7 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::thumb:
   case Triple::thumbeb:
   case Triple::xcore:
+  case Triple::asmjs: // @LOCALMOD Emscripten
     T.setArch(UnknownArch);
     break;
 
@@ -1011,13 +1014,6 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
   case llvm::Triple::Win32:
     // FIXME: this is invalid for WindowsCE
     return "cortex-a9";
-  // @LOCALMOD-START
-  case llvm::Triple::NaCl:
-    // Default to armv7 unless something more specific is specified.
-    if (MArch == "arm")
-      return "cortex-a9";
-    break;
-  // @LOCALMOD-END
   default:
     break;
   }
@@ -1076,6 +1072,8 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
     default:
       return "strongarm";
     }
+  case llvm::Triple::NaCl:
+    return "cortex-a8";
   default:
     switch (getEnvironment()) {
     case llvm::Triple::EABIHF:
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index ada60bfd071..e0f1facac5a 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -689,8 +689,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDNode *Op,
                                                      SDValue &Opc) {
   // @LOCALMOD-START
   // Avoid two reg addressing mode for loads and stores
-  const bool restrict_addressing_modes_for_nacl = Subtarget->isTargetNaCl() &&
-     (Op->getOpcode() == ISD::LOAD || Op->getOpcode() == ISD::STORE);
+  const bool restrict_addressing_modes_for_nacl =
+      Subtarget->isTargetNaCl() && isa<MemSDNode>(Op);
   // This is neither a sandboxable load nor a sandboxable store.
   if (!restrict_addressing_modes_for_nacl) {
   // @LOCALMOD-END
@@ -861,8 +861,8 @@ bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 
   // @LOCALMOD-BEGIN
   // Avoid two reg addressing mode for loads and stores
-  const bool restrict_addressing_modes_for_nacl = Subtarget->isTargetNaCl() &&
-     (Op->getOpcode() == ISD::LOAD || Op->getOpcode() == ISD::STORE);
+  const bool restrict_addressing_modes_for_nacl =
+      Subtarget->isTargetNaCl() && isa<MemSDNode>(Op);
   // @LOCALMOD-END
 
   Offset = N;
@@ -945,8 +945,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N,
                                       SDValue &Opc) {
   // @LOCALMOD-START
   // Avoid two reg addressing mode for loads and stores
-  const bool restrict_addressing_modes_for_nacl = Subtarget->isTargetNaCl() &&
-     (Op->getOpcode() == ISD::LOAD || Op->getOpcode() == ISD::STORE);
+  const bool restrict_addressing_modes_for_nacl =
+      Subtarget->isTargetNaCl() && isa<MemSDNode>(Op);
   if (!restrict_addressing_modes_for_nacl) {
   // @LOCALMOD-END
   if (N.getOpcode() == ISD::SUB) {
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 337a4ad6242..c149f8bc8a8 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -7459,16 +7459,18 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
 
   // Load an immediate to varEnd.
   unsigned varEnd = MRI.createVirtualRegister(TRC);
-  if (IsThumb2) {
+  if (IsThumb2 || Subtarget->useMovt(*MF)) {
     unsigned Vtmp = varEnd;
     if ((LoopSize & 0xFFFF0000) != 0)
       Vtmp = MRI.createVirtualRegister(TRC);
-    AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
-                       .addImm(LoopSize & 0xFFFF));
+    AddDefaultPred(BuildMI(BB, dl,
+                           TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16),
+                           Vtmp).addImm(LoopSize & 0xFFFF));
 
     if ((LoopSize & 0xFFFF0000) != 0)
-      AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
-                         .addReg(Vtmp).addImm(LoopSize >> 16));
+      AddDefaultPred(BuildMI(BB, dl,
+                             TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16),
+                             varEnd).addReg(Vtmp).addImm(LoopSize >> 16));
   } else {
     MachineConstantPool *ConstantPool = MF->getConstantPool();
     Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -10520,8 +10522,8 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
 
   // @LOCALMOD-START
   // Avoid two reg addressing mode for loads and stores
-  const bool restrict_addressing_modes_for_nacl = Subtarget->isTargetNaCl() &&
-      (N->getOpcode() == ISD::LOAD || N->getOpcode() == ISD::STORE);
+  const bool restrict_addressing_modes_for_nacl =
+      Subtarget->isTargetNaCl() && isa<MemSDNode>(N);
   if (restrict_addressing_modes_for_nacl) {
     return false;
   }
@@ -10567,8 +10569,8 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
     return false;
    // @LOCALMOD-START
   // Avoid two reg addressing mode for loads and stores
-  const bool restrict_addressing_modes_for_nacl = Subtarget->isTargetNaCl() &&
-      (N->getOpcode() == ISD::LOAD || N->getOpcode() == ISD::STORE);
+  const bool restrict_addressing_modes_for_nacl =
+      Subtarget->isTargetNaCl() && isa<MemSDNode>(N);
   if (restrict_addressing_modes_for_nacl) {
     return false;
   }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 703e1e776e0..ce166169167 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -276,6 +276,7 @@ def IsNotIOS         : Predicate<"!Subtarget->isTargetIOS()">;
 def IsMachO          : Predicate<"Subtarget->isTargetMachO()">;
 def IsNotMachO       : Predicate<"!Subtarget->isTargetMachO()">;
 def IsNaCl           : Predicate<"Subtarget->isTargetNaCl()">;
+def IsNotNaCl        : Predicate<"!Subtarget->isTargetNaCl()">; // @LOCALMOD
 def UseNaClTrap      : Predicate<"Subtarget->useNaClTrap()">,
                                  AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
 def DontUseNaClTrap  : Predicate<"!Subtarget->useNaClTrap()">;
@@ -4306,9 +4307,11 @@ def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
            Sched<[WriteALU]>;
 
 def : ARMV6Pat<(srl (bswap (extloadi16 addrmode3:$addr)), (i32 16)),
-              (REV16 (LDRH addrmode3:$addr))>;
+              (REV16 (LDRH addrmode3:$addr))>,
+              Requires<[IsARM, HasV6, IsNotNaCl]>; // @LOCALMOD
 def : ARMV6Pat<(truncstorei16 (srl (bswap GPR:$Rn), (i32 16)), addrmode3:$addr),
-               (STRH (REV16 GPR:$Rn), addrmode3:$addr)>;
+               (STRH (REV16 GPR:$Rn), addrmode3:$addr)>,
+               Requires<[IsARM, HasV6, IsNotNaCl]>; // @LOCALMOD
 
 let AddedComplexity = 5 in
 def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 88d7ffaf10f..ea24da756df 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -17,6 +17,7 @@
 #include "X86CallingConv.h"
 #include "X86InstrBuilder.h"
 #include "X86MachineFunctionInfo.h"
+#include "X86NaClDecls.h" // @LOCALMOD
 #include "X86TargetMachine.h"
 #include "X86TargetObjectFile.h"
 #include "llvm/ADT/SmallBitVector.h"
@@ -3404,6 +3405,23 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
       if (!ArgLocs[i].isRegLoc())
         return false;
+    // @LOCALMOD-BEGIN
+    // Do not optimize vararg calls with 6 arguments. TCrets use a reduced
+    // subset of registers because they cannot use callee-saves. When using
+    // sandbox base hiding (with r11 reserved) there are not enough registers
+    // because all of the available registers are used for arguments (%al is
+    // also used for the float/vector register count).
+    // Another option to fix this would be to replace r11 with r10 in the
+    // GR64_TC register class if we decide we don't care about supporting "nest"
+    // parameters. It might even be technically possible to actually use r11
+    // because for tcreturn it's used as a jump target and not read, but I don't
+    // know if reserved regs can be set differently for different purposes like
+    // that.
+    if (Subtarget->isTargetNaCl64() &&
+        FlagHideSandboxBase &&
+        ArgLocs.size() > 5)
+      return false;
+    // @LOCALMOD-END
   }
 
   // If the call result is in ST0 / ST1, it needs to be popped off the x87
diff --git a/lib/Transforms/NaCl/CMakeLists.txt b/lib/Transforms/NaCl/CMakeLists.txt
index b4d4835ba66..a1442467006 100644
--- a/lib/Transforms/NaCl/CMakeLists.txt
+++ b/lib/Transforms/NaCl/CMakeLists.txt
@@ -23,6 +23,7 @@ add_llvm_library(LLVMNaClTransforms
   GlobalCleanup.cpp
   GlobalizeConstantVectors.cpp
   InsertDivideCheck.cpp
+  InternalizeUsedGlobals.cpp
   PNaClABISimplify.cpp
   PNaClSjLjEH.cpp
   PromoteI1Ops.cpp
@@ -34,6 +35,7 @@ add_llvm_library(LLVMNaClTransforms
   RewriteLLVMIntrinsics.cpp
   RewritePNaClLibraryCalls.cpp
   SimplifyAllocas.cpp
+  SimplifyStructRegSignatures.cpp
   StripAttributes.cpp
   StripMetadata.cpp
   LowerEmExceptionsPass.cpp
diff --git a/lib/Transforms/NaCl/ExpandVarArgs.cpp b/lib/Transforms/NaCl/ExpandVarArgs.cpp
index b665a6085e9..53e262f0167 100644
--- a/lib/Transforms/NaCl/ExpandVarArgs.cpp
+++ b/lib/Transforms/NaCl/ExpandVarArgs.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
@@ -45,14 +46,22 @@ INITIALIZE_PASS(ExpandVarArgs, "expand-varargs",
                 "Expand out variable argument function definitions and calls",
                 false, false)
 
-static bool isEmscriptenJSArgsFunc(StringRef Name) {
-  return Name.equals("emscripten_asm_const_int") ||
-         Name.equals("emscripten_asm_const_double") ||
-         Name.equals("emscripten_landingpad") ||
-         Name.equals("emscripten_resume");
+static bool isEmscriptenJSArgsFunc(Module *M, StringRef Name) {
+  // TODO(jfb) Make these intrinsics in clang and remove the assert: these
+  //           intrinsics should only exist for Emscripten.
+  bool isEmscriptenSpecial = Name.equals("emscripten_asm_const_int") ||
+                             Name.equals("emscripten_asm_const_double") ||
+                             Name.equals("emscripten_landingpad") ||
+                             Name.equals("emscripten_resume");
+  assert(isEmscriptenSpecial ? Triple(M->getTargetTriple()).isOSEmscripten()
+                             : true);
+  return isEmscriptenSpecial;
 }
 
-static void ExpandVarArgFunc(Function *Func) {
+static bool ExpandVarArgFunc(Module *M, Function *Func) {
+  if (isEmscriptenJSArgsFunc(M, Func->getName()))
+    return false;
+
   Type *PtrType = Type::getInt8PtrTy(Func->getContext());
 
   FunctionType *FTy = Func->getFunctionType();
@@ -91,6 +100,8 @@ static void ExpandVarArgFunc(Function *Func) {
       }
     }
   }
+
+  return true;
 }
 
 static void ExpandVAArgInst(VAArgInst *Inst, DataLayout *DL) {
@@ -153,20 +164,16 @@ static void ExpandVACopyInst(VACopyInst *Inst) {
 // ExpandVarArgCall() converts a CallInst or InvokeInst to expand out
 // of varargs.  It returns whether the module was modified.
 template <class InstType>
-static bool ExpandVarArgCall(InstType *Call, DataLayout *DL) {
+static bool ExpandVarArgCall(Module *M, InstType *Call, DataLayout *DL) {
   FunctionType *FuncType = cast<FunctionType>(
       Call->getCalledValue()->getType()->getPointerElementType());
   if (!FuncType->isFunctionVarArg())
     return false;
-
-  // EMSCRIPTEN: use js varargs for special instrinsics
-  const Value *CV = Call->getCalledValue();
-  if (isa<Function>(CV) && isEmscriptenJSArgsFunc(CV->getName())) {
-    return false;
-  }
+  if (auto *F = dyn_cast<Function>(Call->getCalledValue()))
+    if (isEmscriptenJSArgsFunc(M, F->getName()))
+      return false;
 
   Function *F = Call->getParent()->getParent();
-  Module *M = F->getParent();
   LLVMContext &Ctx = M->getContext();
 
   SmallVector<AttributeSet, 8> Attrs;
@@ -305,18 +312,15 @@ bool ExpandVarArgs::runOnModule(Module &M) {
           Changed = true;
           ExpandVACopyInst(VAC);
         } else if (auto *Call = dyn_cast<CallInst>(I)) {
-          Changed |= ExpandVarArgCall(Call, &DL);
+          Changed |= ExpandVarArgCall(&M, Call, &DL);
         } else if (auto *Call = dyn_cast<InvokeInst>(I)) {
-          Changed |= ExpandVarArgCall(Call, &DL);
+          Changed |= ExpandVarArgCall(&M, Call, &DL);
         }
       }
     }
 
-    // EMSCRIPTEN: use js varargs for special instrinsics
-    if (F->isVarArg() && !isEmscriptenJSArgsFunc(F->getName())) {
-      Changed = true;
-      ExpandVarArgFunc(F);
-    }
+    if (F->isVarArg())
+      Changed |= ExpandVarArgFunc(&M, F);
   }
 
   return Changed;
diff --git a/lib/Transforms/NaCl/GlobalCleanup.cpp b/lib/Transforms/NaCl/GlobalCleanup.cpp
index 21c3fe8b8e7..dffdc2c6caf 100644
--- a/lib/Transforms/NaCl/GlobalCleanup.cpp
+++ b/lib/Transforms/NaCl/GlobalCleanup.cpp
@@ -13,6 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/Triple.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
@@ -72,27 +73,32 @@ static bool CleanUpLinkage(GlobalValue *GV) {
 bool GlobalCleanup::runOnModule(Module &M) {
   bool Modified = false;
 
-  if (GlobalVariable *GV = M.getNamedGlobal("llvm.compiler.used")) {
+  // TODO(jfb) Emscripten's JSBackend relies on llvm.used to figure out what's
+  //           exported. Should it instead rely on visibility attributes?
+  bool keepLLVMUsed = Triple(M.getTargetTriple()).isOSEmscripten();
+
+  if (auto *GV = M.getNamedGlobal("llvm.compiler.used")) {
     GV->eraseFromParent();
     Modified = true;
   }
-#if 0 // XXX EMSCRIPTEN: we need dis
-  if (GlobalVariable *GV = M.getNamedGlobal("llvm.used")) {
-    GV->eraseFromParent();
-    Modified = true;
+
+  if (!keepLLVMUsed) {
+    if (auto *GV = M.getNamedGlobal("llvm.used")) {
+      GV->eraseFromParent();
+      Modified = true;
+    }
   }
-#endif
 
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ) {
+  for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
     GlobalVariable *GV = I++;
     Modified |= CleanUpLinkage(GV);
   }
 
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+  for (auto I = M.begin(), E = M.end(); I != E;) {
     Function *F = I++;
     Modified |= CleanUpLinkage(F);
   }
+
   return Modified;
 }
 
diff --git a/lib/Transforms/NaCl/InternalizeUsedGlobals.cpp b/lib/Transforms/NaCl/InternalizeUsedGlobals.cpp
new file mode 100644
index 00000000000..fef6fc04be3
--- /dev/null
+++ b/lib/Transforms/NaCl/InternalizeUsedGlobals.cpp
@@ -0,0 +1,67 @@
+//===- InternalizeUsedGlobals.cpp - mark used globals as internal      ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The internalize pass does not mark internal globals marked as "used",
+// which may be achieved with __attribute((used))__ in C++, for example.
+// In PNaCl scenarios, we always perform whole program analysis, and
+// the ABI requires all but entrypoint globals to be internal. This pass
+// satisfies such requirements.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Transforms/NaCl.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+namespace {
+
+class InternalizeUsedGlobals : public ModulePass {
+public:
+  static char ID;
+
+  InternalizeUsedGlobals() : ModulePass(ID) {
+    initializeInternalizeUsedGlobalsPass(*PassRegistry::getPassRegistry());
+  }
+  virtual bool runOnModule(Module &M);
+};
+}
+
+char InternalizeUsedGlobals::ID = 0;
+
+INITIALIZE_PASS(InternalizeUsedGlobals, "internalize-used-globals",
+                "Mark internal globals in the llvm.used list", false, false)
+
+bool InternalizeUsedGlobals::runOnModule(Module &M) {
+  bool Changed = false;
+
+  SmallPtrSet<GlobalValue *, 8> Used;
+  collectUsedGlobalVariables(M, Used, /*CompilerUsed =*/false);
+  for (GlobalValue *V : Used) {
+    if (V->getLinkage() != GlobalValue::InternalLinkage) {
+      // Setting Linkage to InternalLinkage also sets the visibility to
+      // DefaultVisibility.
+      // For explicitness, we do so upfront.
+      V->setVisibility(GlobalValue::DefaultVisibility);
+      V->setLinkage(GlobalValue::InternalLinkage);
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
+ModulePass *llvm::createInternalizeUsedGlobalsPass() {
+  return new InternalizeUsedGlobals();
+}
diff --git a/lib/Transforms/NaCl/PNaClABISimplify.cpp b/lib/Transforms/NaCl/PNaClABISimplify.cpp
index 06fc512816c..fa236a9b237 100644
--- a/lib/Transforms/NaCl/PNaClABISimplify.cpp
+++ b/lib/Transforms/NaCl/PNaClABISimplify.cpp
@@ -58,11 +58,7 @@ void llvm::PNaClABISimplifyAddPreOptPasses(PassManagerBase &PM) {
   PM.add(createLowerEmSetjmpPass()); // XXX EMSCRIPTEN
 
 #if 0 // EMSCRIPTEN: we allow arbitrary symbols to be preserved
-  // Internalize all symbols in the module except the entry point.  A PNaCl
-  // pexe is only allowed to export "_start", whereas a PNaCl PSO is only
-  // allowed to export "__pnacl_pso_root".
-  const char *SymbolsToPreserve[] = { "_start", "__pnacl_pso_root" };
-  PM.add(createInternalizePass(SymbolsToPreserve));
+  PM.add(createInternalizeUsedGlobalsPass());
 #endif
 
   // Expand out computed gotos (indirectbr and blockaddresses) into switches.
@@ -202,6 +198,9 @@ void llvm::PNaClABISimplifyAddPostOptPasses(PassManagerBase &PM) {
   PM.add(createReplacePtrsWithIntsPass());
 #endif
 
+  // Convert struct reg function params to struct* byval
+  PM.add(createSimplifyStructRegSignaturesPass());
+
   // The atomic cmpxchg instruction returns a struct, and is rewritten to an
   // intrinsic as a post-opt pass, we therefore need to expand struct regs.
   PM.add(createExpandStructRegsPass());
@@ -221,9 +220,8 @@ void llvm::PNaClABISimplifyAddPostOptPasses(PassManagerBase &PM) {
   PM.add(createStripDeadPrototypesPass());
 #endif
 
-  // Eliminate simple dead code that the post-opt passes could have
-  // created.
 #if 0 // EMSCRIPTEN: There's no point in running this since we're running DeadCodeElimination right after.
+  // Eliminate simple dead code that the post-opt passes could have created.
   PM.add(createDeadInstEliminationPass());
 #endif
   PM.add(createDeadCodeEliminationPass());
diff --git a/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp b/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp
index d1d2d179365..53f0d1d809e 100644
--- a/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp
+++ b/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp
@@ -331,6 +331,8 @@ static AttributeSet RemovePointerAttrs(LLVMContext &Context,
         case Attribute::NoAlias:
         case Attribute::ReadNone:
         case Attribute::ReadOnly:
+        case Attribute::NonNull:
+        case Attribute::Dereferenceable:
           break;
         default:
           AB.addAttribute(*Attr);
diff --git a/lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp b/lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp
index a11dd0f5b09..5b3cb3b270d 100644
--- a/lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp
+++ b/lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp
@@ -115,8 +115,10 @@ bool RewriteLLVMIntrinsics::runOnModule(Module &M) {
 
   // Remove all @llvm.prefetch intrinsics.
   ToNothing PrefetchRewriter(M, Intrinsic::prefetch);
+  ToNothing AssumeRewriter(M, Intrinsic::assume);
 
-  return visitUses(FltRoundsRewriter) | visitUses(PrefetchRewriter);
+  return visitUses(FltRoundsRewriter) | visitUses(PrefetchRewriter)
+    | visitUses(AssumeRewriter);
 }
 
 bool RewriteLLVMIntrinsics::visitUses(IntrinsicRewriter &Rewriter) {
diff --git a/lib/Transforms/NaCl/SimplifyStructRegSignatures.cpp b/lib/Transforms/NaCl/SimplifyStructRegSignatures.cpp
new file mode 100644
index 00000000000..48cbbde3867
--- /dev/null
+++ b/lib/Transforms/NaCl/SimplifyStructRegSignatures.cpp
@@ -0,0 +1,697 @@
+//===- SimplifyStructRegSignatures.cpp - struct regs to struct pointers----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces function signatures exposing struct registers
+// to byval pointer-based signatures.
+//
+// There are 2 types of signatures that are thus changed:
+//
+// @foo(%some_struct %val) -> @foo(%some_struct* byval %val)
+//      and
+// %someStruct @bar(<other_args>) -> void @bar(%someStruct* sret, <other_args>)
+//
+// Such function types may appear in other type declarations, for example:
+//
+// %a_struct = type { void (%some_struct)*, i32 }
+//
+// We map such types to corresponding types, mapping the function types
+// appropriately:
+//
+// %a_struct.0 = type { void (%some_struct*)*, i32 }
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/PassInfo.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Transforms/NaCl.h"
+#include "llvm/Support/Debug.h"
+
+#include <cassert>
+#include <cstddef>
+
+using namespace llvm;
+
+namespace {
+
+static const unsigned int TypicalFuncArity = 8;
+static const unsigned int TypicalStructArity = 8;
+
+class MappingResult {
+public:
+  MappingResult(Type *ATy, bool Chg) {
+    Ty = ATy;
+    Changed = Chg;
+  }
+
+  bool isChanged() { return Changed; }
+
+  Type *operator->() { return Ty; }
+
+  operator Type *() { return Ty; }
+
+private:
+  Type *Ty;
+  bool Changed;
+};
+
+// Utility class. For any given type, get the associated type that is free of
+// struct register arguments.
+class TypeMapper {
+public:
+  typedef DenseMap<StructType *, StructType *> StructMap;
+  Type *getSimpleType(LLVMContext &Ctx, Type *Ty);
+
+private:
+  DenseMap<Type *, Type *> MappedTypes;
+  MappingResult getSimpleArgumentType(LLVMContext &Ctx, Type *Ty,
+                                      StructMap &Tentatives);
+  MappingResult getSimpleAggregateTypeInternal(LLVMContext &Ctx, Type *Ty,
+                                               StructMap &Tentatives);
+
+  bool isChangedStruct(LLVMContext &Ctx, StructType *StructTy,
+                       SmallVector<Type *, TypicalStructArity> &ElemTypes,
+                       StructMap &Tentatives);
+};
+
+// This is a ModulePass because the pass recreates functions in
+// order to change their signatures.
+class SimplifyStructRegSignatures : public ModulePass {
+public:
+  static char ID;
+
+  SimplifyStructRegSignatures() : ModulePass(ID) {
+    initializeSimplifyStructRegSignaturesPass(*PassRegistry::getPassRegistry());
+  }
+  virtual bool runOnModule(Module &M);
+
+private:
+  TypeMapper Mapper;
+  DenseSet<Function *> FunctionsToDelete;
+  SetVector<CallInst *> CallsToPatch;
+  SetVector<InvokeInst *> InvokesToPatch;
+  DenseMap<Function *, Function *> FunctionMap;
+  bool
+  simplifyFunction(LLVMContext &Ctx, Function *OldFunc,
+                   DenseMap<const Function *, DISubprogram> &DISubprogramMap);
+  void scheduleInstructionsForCleanup(Function *NewFunc);
+  template <class TCall>
+  void fixCallSite(LLVMContext &Ctx, TCall *Call, unsigned PreferredAlignment);
+  void fixFunctionBody(LLVMContext &Ctx, Function *OldFunc, Function *NewFunc);
+
+  template <class TCall>
+  TCall *fixCallTargetAndArguments(LLVMContext &Ctx, IRBuilder<> &Builder,
+                                   TCall *OldCall, Value *NewTarget,
+                                   FunctionType *NewType,
+                                   BasicBlock::iterator AllocaInsPoint,
+                                   Value *ExtraArg = nullptr);
+  void checkNoUnsupportedInstructions(LLVMContext &Ctx, Function *Fct);
+};
+}
+
+char SimplifyStructRegSignatures::ID = 0;
+
+INITIALIZE_PASS(
+    SimplifyStructRegSignatures, "simplify-struct-reg-signatures",
+    "Simplify function signatures by removing struct register parameters",
+    false, false)
+
+// The type is "simple" if it does not recursively reference a
+// function type with at least an operand (arg or return) typed as struct
+// register.
+Type *TypeMapper::getSimpleType(LLVMContext &Ctx, Type *Ty) {
+  auto Found = MappedTypes.find(Ty);
+  if (Found != MappedTypes.end()) {
+    return Found->second;
+  }
+
+  StructMap Tentatives;
+  auto Ret = getSimpleAggregateTypeInternal(Ctx, Ty, Tentatives);
+  assert(Tentatives.size() == 0);
+
+  if (!Ty->isStructTy()) {
+    // Structs are memoized in getSimpleAggregateTypeInternal.
+    MappedTypes[Ty] = Ret;
+  }
+  return Ret;
+}
+
+// Transforms any type that could transitively reference a function pointer
+// into a simplified type.
+// We enter this function trying to determine the mapping of a type. Because
+// of how structs are handled (not interned by llvm - see further comments
+// below) we may be working with temporary types - types (pointers, for example)
+// transitively referencing "tentative" structs. For that reason, we do not
+// memoize anything here, except for structs. The latter is so that we avoid
+// unnecessary repeated creation of types (pointers, function types, etc),
+// as we try to map a given type.
+MappingResult
+TypeMapper::getSimpleAggregateTypeInternal(LLVMContext &Ctx, Type *Ty,
+                                           StructMap &Tentatives) {
+  // Leverage the map for types we encounter on the way.
+  auto Found = MappedTypes.find(Ty);
+  if (Found != MappedTypes.end()) {
+    return {Found->second, Found->second != Ty};
+  }
+
+  if (auto *OldFnTy = dyn_cast<FunctionType>(Ty)) {
+    Type *OldRetType = OldFnTy->getReturnType();
+    Type *NewRetType = OldRetType;
+    Type *Void = Type::getVoidTy(Ctx);
+    SmallVector<Type *, TypicalFuncArity> NewArgs;
+    bool Changed = false;
+    // Struct register returns become the first parameter of the new FT.
+    // The new FT has void for the return type
+    if (OldRetType->isAggregateType()) {
+      NewRetType = Void;
+      Changed = true;
+      NewArgs.push_back(getSimpleArgumentType(Ctx, OldRetType, Tentatives));
+    }
+    for (auto OldParam : OldFnTy->params()) {
+      auto NewType = getSimpleArgumentType(Ctx, OldParam, Tentatives);
+      Changed |= NewType.isChanged();
+      NewArgs.push_back(NewType);
+    }
+    Type *NewFuncType =
+        FunctionType::get(NewRetType, NewArgs, OldFnTy->isVarArg());
+    return {NewFuncType, Changed};
+  }
+
+  if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
+    auto NewTy = getSimpleAggregateTypeInternal(
+        Ctx, PtrTy->getPointerElementType(), Tentatives);
+
+    return {NewTy->getPointerTo(PtrTy->getAddressSpace()), NewTy.isChanged()};
+  }
+
+  if (auto ArrTy = dyn_cast<ArrayType>(Ty)) {
+    auto NewTy = getSimpleAggregateTypeInternal(
+        Ctx, ArrTy->getArrayElementType(), Tentatives);
+    return {ArrayType::get(NewTy, ArrTy->getArrayNumElements()),
+            NewTy.isChanged()};
+  }
+
+  if (auto VecTy = dyn_cast<VectorType>(Ty)) {
+    auto NewTy = getSimpleAggregateTypeInternal(
+        Ctx, VecTy->getVectorElementType(), Tentatives);
+    return {VectorType::get(NewTy, VecTy->getVectorNumElements()),
+            NewTy.isChanged()};
+  }
+
+  // LLVM doesn't intern identified structs (the ones with a name). This,
+  // together with the fact that such structs can be recursive,
+  // complicates things a bit. We want to make sure that we only change
+  // "unsimplified" structs (those that somehow reference funcs that
+  // are not simple).
+  // We don't want to change "simplified" structs, otherwise converting
+  // instruction types will become trickier.
+  if (auto StructTy = dyn_cast<StructType>(Ty)) {
+    SmallVector<Type *, TypicalStructArity> ElemTypes;
+    if (!StructTy->isLiteral()) {
+      // Literals - struct without a name - cannot be recursive, so we
+      // don't need to form tentatives.
+      auto Found = Tentatives.find(StructTy);
+
+      // Having a tentative means we are in a recursion trying to map this
+      // particular struct, so arriving back to it is not a change.
+      // We will determine if this struct is actually
+      // changed by checking its other fields.
+      if (Found != Tentatives.end()) {
+        return {Found->second, false};
+      }
+      // We have never seen this struct, so we start a tentative.
+      std::string NewName = StructTy->getStructName();
+      NewName += ".simplified";
+      StructType *Tentative = StructType::create(Ctx, NewName);
+      Tentatives[StructTy] = Tentative;
+
+      bool Changed = isChangedStruct(Ctx, StructTy, ElemTypes, Tentatives);
+
+      Tentatives.erase(StructTy);
+      // We can now decide the mapping of the struct. We will register it
+      // early with MappedTypes, to avoid leaking tentatives unnecessarily.
+      // We are leaking the created struct here, but there is no way to
+      // correctly delete it.
+      if (!Changed) {
+        return {MappedTypes[StructTy] = StructTy, false};
+      } else {
+        Tentative->setBody(ElemTypes, StructTy->isPacked());
+        return {MappedTypes[StructTy] = Tentative, true};
+      }
+    } else {
+      bool Changed = isChangedStruct(Ctx, StructTy, ElemTypes, Tentatives);
+      return {MappedTypes[StructTy] =
+                  StructType::get(Ctx, ElemTypes, StructTy->isPacked()),
+              Changed};
+    }
+  }
+
+  // Anything else stays the same.
+  return {Ty, false};
+}
+
+bool TypeMapper::isChangedStruct(
+    LLVMContext &Ctx, StructType *StructTy,
+    SmallVector<Type *, TypicalStructArity> &ElemTypes, StructMap &Tentatives) {
+  bool Changed = false;
+  unsigned StructElemCount = StructTy->getStructNumElements();
+  for (unsigned I = 0; I < StructElemCount; I++) {
+    auto NewElem = getSimpleAggregateTypeInternal(
+        Ctx, StructTy->getStructElementType(I), Tentatives);
+    ElemTypes.push_back(NewElem);
+    Changed |= NewElem.isChanged();
+  }
+  return Changed;
+}
+
+// Get the simplified type of a function argument.
+MappingResult TypeMapper::getSimpleArgumentType(LLVMContext &Ctx, Type *Ty,
+                                                StructMap &Tentatives) {
+  // struct registers become pointers to simple structs
+  if (Ty->isAggregateType()) {
+    return MappingResult(
+        PointerType::get(getSimpleAggregateTypeInternal(Ctx, Ty, Tentatives),
+                         0),
+        true);
+  }
+
+  return getSimpleAggregateTypeInternal(Ctx, Ty, Tentatives);
+}
+
+// Apply 'byval' to func arguments that used to be struct regs.
+// Apply 'sret' to the argument corresponding to the return in the old
+// signature.
+static void ApplyByValAndSRet(Function *OldFunc, Function *NewFunc) {
+  // When calling addAttribute, the first one refers to the function, so we
+  // skip past that.
+  unsigned ArgOffset = 1;
+  if (OldFunc->getReturnType()->isAggregateType()) {
+    NewFunc->addAttribute(1, Attribute::AttrKind::StructRet);
+    ArgOffset++;
+  }
+
+  auto &NewArgList = NewFunc->getArgumentList();
+  auto NewArg = NewArgList.begin();
+  for (const Argument &OldArg : OldFunc->getArgumentList()) {
+    if (OldArg.getType()->isAggregateType()) {
+      NewFunc->addAttribute(NewArg->getArgNo() + ArgOffset,
+                            Attribute::AttrKind::ByVal);
+    }
+    NewArg++;
+  }
+}
+
+// Update the arg names for a newly created function.
+static void UpdateArgNames(Function *OldFunc, Function *NewFunc) {
+  auto NewArgIter = NewFunc->arg_begin();
+  if (OldFunc->getReturnType()->isAggregateType()) {
+    NewArgIter->setName("retVal");
+    NewArgIter++;
+  }
+
+  for (const Argument &OldArg : OldFunc->args()) {
+    Argument *NewArg = NewArgIter++;
+    NewArg->setName(OldArg.getName() +
+                    (OldArg.getType()->isAggregateType() ? ".ptr" : ""));
+  }
+}
+
+// Replace all uses of an old value with a new one, disregarding the type. We
+// correct the types after we wire the new parameters in, in fixFunctionBody.
+static void BlindReplace(Value *Old, Value *New) {
+  for (auto UseIter = Old->use_begin(), E = Old->use_end(); E != UseIter;) {
+    Use &AUse = *(UseIter++);
+    AUse.set(New);
+  }
+}
+
+// Adapt the body of a function for the new arguments.
+static void ConvertArgumentValue(Value *Old, Value *New,
+                                 Instruction *InsPoint) {
+  if (Old == New)
+    return;
+
+  if (Old->getType() == New->getType()) {
+    Old->replaceAllUsesWith(New);
+    New->takeName(Old);
+    return;
+  }
+
+  bool IsAggregateToPtr =
+      Old->getType()->isAggregateType() && New->getType()->isPointerTy();
+  BlindReplace(Old, (IsAggregateToPtr
+                         ? new LoadInst(New, Old->getName() + ".sreg", InsPoint)
+                         : New));
+}
+
+// Fix returns. Return true if fixes were needed.
+static void FixReturn(Function *OldFunc, Function *NewFunc) {
+
+  Argument *FirstNewArg = NewFunc->getArgumentList().begin();
+
+  for (auto BIter = NewFunc->begin(), LastBlock = NewFunc->end();
+       LastBlock != BIter;) {
+    BasicBlock *BB = BIter++;
+    for (auto IIter = BB->begin(), LastI = BB->end(); LastI != IIter;) {
+      Instruction *Instr = IIter++;
+      if (ReturnInst *Ret = dyn_cast<ReturnInst>(Instr)) {
+        auto RetVal = Ret->getReturnValue();
+        IRBuilder<> Builder(Ret);
+        StoreInst *Store = Builder.CreateStore(RetVal, FirstNewArg);
+        Store->setAlignment(FirstNewArg->getParamAlignment());
+        Builder.CreateRetVoid();
+        Ret->eraseFromParent();
+      }
+    }
+  }
+}
+
+// TODO (mtrofin): is this comprehensive?
+template <class TCall>
+void CopyCallAttributesAndMetadata(TCall *Orig, TCall *NewCall) {
+  NewCall->setCallingConv(Orig->getCallingConv());
+  NewCall->setAttributes(NewCall->getAttributes().addAttributes(
+      Orig->getContext(), AttributeSet::FunctionIndex,
+      Orig->getAttributes().getFnAttributes()));
+  NewCall->takeName(Orig);
+}
+
+static InvokeInst *CreateCallFrom(InvokeInst *Orig, Value *Target,
+                                  ArrayRef<Value *> &Args,
+                                  IRBuilder<> &Builder) {
+  auto Ret = Builder.CreateInvoke(Target, Orig->getNormalDest(),
+                                  Orig->getUnwindDest(), Args);
+  CopyCallAttributesAndMetadata(Orig, Ret);
+  return Ret;
+}
+
+static CallInst *CreateCallFrom(CallInst *Orig, Value *Target,
+                                ArrayRef<Value *> &Args, IRBuilder<> &Builder) {
+
+  CallInst *Ret = Builder.CreateCall(Target, Args);
+  Ret->setTailCallKind(Orig->getTailCallKind());
+  CopyCallAttributesAndMetadata(Orig, Ret);
+  return Ret;
+}
+
+// Insert Alloca at a specified location (normally, beginning of function)
+// to avoid memory leaks if reason for inserting the Alloca
+// (typically a call/invoke) is in a loop.
+static AllocaInst *InsertAllocaAtLocation(IRBuilder<> &Builder,
+                                          BasicBlock::iterator &AllocaInsPoint,
+                                          Type *ValType) {
+  auto SavedInsPoint = Builder.GetInsertPoint();
+  Builder.SetInsertPoint(AllocaInsPoint);
+  auto *Alloca = Builder.CreateAlloca(ValType);
+  AllocaInsPoint = Builder.GetInsertPoint();
+  Builder.SetInsertPoint(SavedInsPoint);
+  return Alloca;
+}
+
+// Fix a call site by handing return type changes and/or parameter type and
+// attribute changes.
+template <class TCall>
+void SimplifyStructRegSignatures::fixCallSite(LLVMContext &Ctx, TCall *OldCall,
+                                              unsigned PreferredAlignment) {
+  Value *NewTarget = OldCall->getCalledValue();
+
+  if (Function *CalledFunc = dyn_cast<Function>(NewTarget)) {
+    NewTarget = this->FunctionMap[CalledFunc];
+  }
+  assert(NewTarget);
+
+  auto *NewType = cast<FunctionType>(
+      Mapper.getSimpleType(Ctx, NewTarget->getType())->getPointerElementType());
+
+  auto *OldRetType = OldCall->getType();
+  const bool IsSRet =
+      !OldCall->getType()->isVoidTy() && NewType->getReturnType()->isVoidTy();
+
+  IRBuilder<> Builder(OldCall);
+  auto AllocaInsPoint =
+      OldCall->getParent()->getParent()->getEntryBlock().getFirstInsertionPt();
+
+  if (IsSRet) {
+    auto *Alloca = InsertAllocaAtLocation(Builder, AllocaInsPoint, OldRetType);
+
+    Alloca->takeName(OldCall);
+    Alloca->setAlignment(PreferredAlignment);
+
+    auto *NewCall = fixCallTargetAndArguments(Ctx, Builder, OldCall, NewTarget,
+                                              NewType, AllocaInsPoint, Alloca);
+    assert(NewCall);
+    if (auto *Invoke = dyn_cast<InvokeInst>(OldCall))
+      Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt());
+
+    auto *Load = Builder.CreateLoad(Alloca, Alloca->getName() + ".sreg");
+    Load->setAlignment(Alloca->getAlignment());
+    OldCall->replaceAllUsesWith(Load);
+  } else {
+    auto *NewCall = fixCallTargetAndArguments(Ctx, Builder, OldCall, NewTarget,
+                                              NewType, AllocaInsPoint);
+    OldCall->replaceAllUsesWith(NewCall);
+  }
+
+  OldCall->eraseFromParent();
+}
+
+template <class TCall>
+TCall *SimplifyStructRegSignatures::fixCallTargetAndArguments(
+    LLVMContext &Ctx, IRBuilder<> &Builder, TCall *OldCall, Value *NewTarget,
+    FunctionType *NewType, BasicBlock::iterator AllocaInsPoint,
+    Value *ExtraArg) {
+  SmallSetVector<unsigned, TypicalFuncArity> ByRefPlaces;
+  SmallVector<Value *, TypicalFuncArity> NewArgs;
+
+  unsigned argOffset = ExtraArg ? 1 : 0;
+  if (ExtraArg)
+    NewArgs.push_back(ExtraArg);
+
+  // Go over the argument list used in the call/invoke, in order to
+  // correctly deal with varargs scenarios.
+  unsigned NumActualParams = OldCall->getNumArgOperands();
+  unsigned VarargMark = NewType->getNumParams();
+  for (unsigned ArgPos = 0; ArgPos < NumActualParams; ArgPos++) {
+
+    Use &OldArgUse = OldCall->getOperandUse(ArgPos);
+    Value *OldArg = OldArgUse;
+    Type *OldArgType = OldArg->getType();
+    unsigned NewArgPos = OldArgUse.getOperandNo() + argOffset;
+    Type *NewArgType = NewType->getFunctionParamType(NewArgPos);
+
+    if (OldArgType != NewArgType && OldArgType->isAggregateType()) {
+      if (NewArgPos >= VarargMark) {
+        errs() << *OldCall << '\n';
+        report_fatal_error("Aggregate register vararg is not supported");
+      }
+      auto *Alloca =
+          InsertAllocaAtLocation(Builder, AllocaInsPoint, OldArgType);
+      Alloca->setName(OldArg->getName() + ".ptr");
+
+      Builder.CreateStore(OldArg, Alloca);
+      ByRefPlaces.insert(NewArgPos);
+      NewArgs.push_back(Alloca);
+    } else {
+      NewArgs.push_back(OldArg);
+    }
+  }
+
+  ArrayRef<Value *> ArrRef = NewArgs;
+  TCall *NewCall = CreateCallFrom(OldCall, NewTarget, ArrRef, Builder);
+
+  // Copy the attributes over, and add byref/sret as necessary.
+  const AttributeSet &OldAttrSet = OldCall->getAttributes();
+  const AttributeSet &NewAttrSet = NewCall->getAttributes();
+
+  for (unsigned I = 0; I < NewCall->getNumArgOperands(); I++) {
+    NewCall->setAttributes(NewAttrSet.addAttributes(
+        Ctx, I + argOffset + 1, OldAttrSet.getParamAttributes(I + 1)));
+    if (ByRefPlaces.count(I)) {
+      NewCall->addAttribute(I + 1, Attribute::ByVal);
+    }
+  }
+
+  if (ExtraArg) {
+    NewAttrSet.addAttributes(Ctx, 1, OldAttrSet.getRetAttributes());
+    NewCall->addAttribute(1, Attribute::StructRet);
+  } else {
+    NewCall->setAttributes(NewAttrSet.addAttributes(
+        Ctx, AttributeSet::ReturnIndex, OldAttrSet.getRetAttributes()));
+  }
+  return NewCall;
+}
+
+void SimplifyStructRegSignatures::scheduleInstructionsForCleanup(
+    Function *NewFunc) {
+  for (auto &BBIter : NewFunc->getBasicBlockList()) {
+    for (auto &IIter : BBIter.getInstList()) {
+      if (CallInst *Call = dyn_cast<CallInst>(&IIter)) {
+        CallsToPatch.insert(Call);
+      } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(&IIter)) {
+        InvokesToPatch.insert(Invoke);
+      }
+    }
+  }
+}
+
+// Change function body in the light of type changes.
+void SimplifyStructRegSignatures::fixFunctionBody(LLVMContext &Ctx,
+                                                  Function *OldFunc,
+                                                  Function *NewFunc) {
+  if (NewFunc->empty())
+    return;
+
+  bool returnWasFixed = OldFunc->getReturnType()->isAggregateType();
+
+  Instruction *InsPoint = NewFunc->begin()->begin();
+  auto NewArgIter = NewFunc->arg_begin();
+  // Advance one more if we used to return a struct register.
+  if (returnWasFixed)
+    NewArgIter++;
+
+  // Wire new parameters in.
+  for (auto ArgIter = OldFunc->arg_begin(), E = OldFunc->arg_end();
+       E != ArgIter;) {
+    Argument *OldArg = ArgIter++;
+    Argument *NewArg = NewArgIter++;
+    ConvertArgumentValue(OldArg, NewArg, InsPoint);
+  }
+
+  // Now fix instruction types. We know that each value could only possibly be
+  // of a simplified type. At the end of this, call sites will be invalid, but
+  // we handle that afterwards, to make sure we have all the functions changed
+  // first (so that calls have valid targets)
+  for (auto BBIter = NewFunc->begin(), LBlock = NewFunc->end();
+       LBlock != BBIter;) {
+    auto Block = BBIter++;
+    for (auto IIter = Block->begin(), LIns = Block->end(); LIns != IIter;) {
+      auto Instr = IIter++;
+      Instr->mutateType(Mapper.getSimpleType(Ctx, Instr->getType()));
+    }
+  }
+  if (returnWasFixed)
+    FixReturn(OldFunc, NewFunc);
+}
+
+// Ensure function is simplified, returning true if the function
+// had to be changed.
+bool SimplifyStructRegSignatures::simplifyFunction(
+    LLVMContext &Ctx, Function *OldFunc,
+    DenseMap<const Function *, DISubprogram> &DISubprogramMap) {
+  auto *OldFT = OldFunc->getFunctionType();
+  auto *NewFT = cast<FunctionType>(Mapper.getSimpleType(Ctx, OldFT));
+
+  Function *&AssociatedFctLoc = FunctionMap[OldFunc];
+  if (NewFT != OldFT) {
+    auto *NewFunc = Function::Create(NewFT, OldFunc->getLinkage());
+    AssociatedFctLoc = NewFunc;
+
+    NewFunc->copyAttributesFrom(OldFunc);
+    OldFunc->getParent()->getFunctionList().insert(OldFunc, NewFunc);
+    NewFunc->takeName(OldFunc);
+
+    UpdateArgNames(OldFunc, NewFunc);
+    ApplyByValAndSRet(OldFunc, NewFunc);
+
+    NewFunc->getBasicBlockList().splice(NewFunc->begin(),
+                                        OldFunc->getBasicBlockList());
+
+    fixFunctionBody(Ctx, OldFunc, NewFunc);
+    FunctionsToDelete.insert(OldFunc);
+    auto Found = DISubprogramMap.find(OldFunc);
+    if (Found != DISubprogramMap.end())
+      Found->second.replaceFunction(NewFunc);
+  } else {
+    AssociatedFctLoc = OldFunc;
+  }
+  scheduleInstructionsForCleanup(AssociatedFctLoc);
+  return NewFT != OldFT;
+}
+
+bool SimplifyStructRegSignatures::runOnModule(Module &M) {
+  bool Changed = false;
+
+  const DataLayout *DL = M.getDataLayout();
+  unsigned PreferredAlignment = 0;
+  if (DL)
+    PreferredAlignment = DL->getStackAlignment();
+
+  LLVMContext &Ctx = M.getContext();
+  auto DISubprogramMap = makeSubprogramMap(M);
+
+  // Change function signatures and fix a changed function body by
+  // wiring the new arguments. Call sites are unchanged at this point.
+  for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E;) {
+    Function *Func = Iter++;
+    checkNoUnsupportedInstructions(Ctx, Func);
+    Changed |= simplifyFunction(Ctx, Func, DISubprogramMap);
+  }
+
+  // Fix call sites.
+  for (auto &CallToFix : CallsToPatch) {
+    fixCallSite(Ctx, CallToFix, PreferredAlignment);
+  }
+
+  for (auto &InvokeToFix : InvokesToPatch) {
+    fixCallSite(Ctx, InvokeToFix, PreferredAlignment);
+  }
+
+  // Delete leftover functions - the ones with old signatures.
+  for (auto &ToDelete : FunctionsToDelete) {
+    ToDelete->eraseFromParent();
+  }
+
+  return Changed;
+}
+
+void SimplifyStructRegSignatures::checkNoUnsupportedInstructions(
+    LLVMContext &Ctx, Function *Fct) {
+  for (auto &BB : Fct->getBasicBlockList())
+    for (auto &Inst : BB.getInstList())
+      if (auto *Landing = dyn_cast<LandingPadInst>(&Inst)) {
+        auto *LType = Landing->getPersonalityFn()->getType();
+        if (LType != Mapper.getSimpleType(Ctx, LType)) {
+          errs() << *Landing << '\n';
+          report_fatal_error("Landing pads with aggregate register "
+                             "signatures are not supported.");
+        }
+      } else if (auto *Resume = dyn_cast<ResumeInst>(&Inst)) {
+        auto *RType = Resume->getValue()->getType();
+        if (RType != Mapper.getSimpleType(Ctx, RType)) {
+          errs() << *Resume << '\n';
+          report_fatal_error(
+              "Resumes with aggregate register signatures are not supported.");
+        }
+      }
+}
+
+ModulePass *llvm::createSimplifyStructRegSignaturesPass() {
+  return new SimplifyStructRegSignatures();
+}
diff --git a/test/CodeGen/ARM/atomic-16bit-nacl.ll b/test/CodeGen/ARM/atomic-16bit-nacl.ll
new file mode 100644
index 00000000000..6c3c603973c
--- /dev/null
+++ b/test/CodeGen/ARM/atomic-16bit-nacl.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=armv7 < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-unknown-nacl < %s | FileCheck %s -check-prefix=NACL
+
+; Make sure NaCl doesn't generate register-register loads, which are disallowed
+; by its sandbox.
+
+@i16_large = internal global [516 x i8] undef
+
+define void @test() {
+  %i = ptrtoint [516 x i8]* @i16_large to i32
+  %a = add i32 %i, 512
+  %a.asptr = inttoptr i32 %a to i16*
+  %loaded = load atomic i16* %a.asptr seq_cst, align 2
+  ret void
+}
+; CHECK-LABEL: test:
+; CHECK: ldrh	r0, [r1, r0]
+
+; NACL-LABEL: test:
+; NACL: movw	r0, :lower16:i16_large
+; NACL: movt	r0, :upper16:i16_large
+; NACL: add	r0, r0, #512
+; NACL: ldrh	r0, [r0]
+; NACL: dmb	ish
diff --git a/test/CodeGen/ARM/bswap16.ll b/test/CodeGen/ARM/bswap16.ll
index 70c62d294ee..99a26b3e2db 100644
--- a/test/CodeGen/ARM/bswap16.ll
+++ b/test/CodeGen/ARM/bswap16.ll
@@ -1,6 +1,9 @@
 ; RUN: llc -mtriple=arm-darwin  -mattr=v6 < %s | FileCheck %s
 ; RUN: llc -mtriple=thumb-darwin  -mattr=v6 < %s | FileCheck %s
 
+; @LOCALMOD Change to test that NaCl doesn't merge the pattern.
+; RUN: llc -mtriple=armv7-unknown-nacl < %s | FileCheck %s -check-prefix=NACL
+
 
 define void @test1(i16* nocapture %data) {
 entry:
@@ -13,6 +16,12 @@ entry:
   ; CHECK: ldrh r[[R1:[0-9]+]], [r0]
   ; CHECK: rev16 r[[R1]], r[[R1]]
   ; CHECK: strh r[[R1]], [r0]
+
+  ; NACL-LABEL: test1:
+  ; NACL: ldrh r[[R1:[0-9]+]], [r0]
+  ; NACL: rev r[[R1]], r[[R1]]
+  ; NACL: lsr r[[R1]], r[[R1]], #16
+  ; NACL: strh r[[R1]], [r0]
 }
 
 
@@ -25,6 +34,10 @@ entry:
   ; CHECK-LABEL: test2:
   ; CHECK: rev16 r[[R1:[0-9]+]], r1
   ; CHECK: strh r[[R1]], [r0]
+
+  ; NACL-LABEL: test2:
+  ; NACL: rev16 r[[R1:[0-9]+]], r1
+  ; NACL: strh r[[R1]], [r0]
 }
 
 
@@ -37,6 +50,35 @@ entry:
   ; CHECK-LABEL: test3:
   ; CHECK: ldrh r[[R0:[0-9]+]], [r0]
   ; CHECK: rev16 r[[R0]], r0
+
+  ; NACL-LABEL: test3:
+  ; NACL: ldrh r[[R0:[0-9]+]], [r0]
+  ; NACL: rev r[[R0]], r0
+  ; NACL: lsr r[[R0]], r0, #16
+}
+
+define i16 @test4(i32 %in) {
+  %1 = add i32 %in, 256
+  %2 = inttoptr i32 %1 to i32*
+  %3 = load i32* %2, align 2
+  %4 = trunc i32 %3 to i16
+  %5 = tail call i16 @llvm.bswap.i16(i16 %4)
+  %6 = add i32 %in, 258
+  %7 = inttoptr i32 %6 to i16*
+  store i16 %5, i16* %7, align 2
+  ret i16 %5
+
+  ; CHECK-LABEL: test4:
+  ; CHECK: ldrh r[[R1:[0-9]+]], [r0, r1]
+  ; CHECK: rev16 r[[R2:[0-9]+]], r[[R1]]
+  ; CHECK: strh r[[R2]], [r0, r3]
+
+  ; NACL-LABEL: test4:
+  ; NACL: add r[[R0:[0-9]+]], r0, #256
+  ; NACL: ldrh r[[R0]], [r0]
+  ; NACL: rev r[[R0]], r[[R0]]
+  ; NACL: lsr r[[R0]], r[[R0]], #16
+  ; NACL: strh r[[R0]], [r1]
 }
 
 declare i16 @llvm.bswap.i16(i16)
diff --git a/test/CodeGen/ARM/struct_byval.ll b/test/CodeGen/ARM/struct_byval.ll
index 130925a0c23..4aceec47fbd 100644
--- a/test/CodeGen/ARM/struct_byval.ll
+++ b/test/CodeGen/ARM/struct_byval.ll
@@ -1,6 +1,10 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios6.0 | FileCheck %s -check-prefix=THUMB
 
+; For NaCl (where we always want movt) ensure no const pool loads are generated.
+; RUN: llc < %s -mtriple=armv7-unknown-nacl-gnueabi | FileCheck %s -check-prefix=NACL
+; NACL-NOT: .LCPI
+
 ; rdar://9877866
 %struct.SmallStruct = type { i32, [8 x i32], [37 x i8] }
 %struct.LargeStruct = type { i32, [1001 x i8], [300 x i32] }
diff --git a/test/CodeGen/X86/tailcall-64.ll b/test/CodeGen/X86/tailcall-64.ll
index deab1dcc7eb..e4e696073aa 100644
--- a/test/CodeGen/X86/tailcall-64.ll
+++ b/test/CodeGen/X86/tailcall-64.ll
@@ -1,4 +1,8 @@
 ; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=core2 < %s | FileCheck %s
+; @LOCALMOD
+; TODO: NaCl tailcall "support" is a mess and has no tests. This only tests for
+; Bug https://code.google.com/p/nativeclient/issues/detail?id=4123
+; RUN: llc -mtriple=x86_64-unknown-nacl -mcpu=core2 < %s | FileCheck %s --check-prefix=NACL
 
 declare i64 @testi()
 
@@ -208,6 +212,10 @@ entry:
 ;
 ; CHECK-LABEL: rdar12282281
 ; CHECK: jmpq *%r11 # TAILCALL
+
+; For NaCl, ensure we do not tail-call-optimize 6-arg functions
+; NACL-LABEL: rdar12282281
+; NACL: naclcall
 @funcs = external constant [0 x i32 (i8*, ...)*]
 
 define i32 @rdar12282281(i32 %n) nounwind uwtable ssp {
@@ -223,6 +231,7 @@ define x86_fp80 @fp80_call(x86_fp80 %x) nounwind  {
 entry:
 ; CHECK-LABEL: fp80_call:
 ; CHECK: jmp _fp80_callee
+; NACL-LABEL: fp80_call
   %call = tail call x86_fp80 @fp80_callee(x86_fp80 %x) nounwind
   ret x86_fp80 %call
 }
diff --git a/test/Transforms/NaCl/expand-varargs-emscripten.ll b/test/Transforms/NaCl/expand-varargs-emscripten.ll
new file mode 100644
index 00000000000..261ff94b74f
--- /dev/null
+++ b/test/Transforms/NaCl/expand-varargs-emscripten.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -mtriple=asmjs-unknown-emscripten -expand-varargs -S | FileCheck %s
+
+target datalayout = "p:32:32:32"
+
+%va_list = type i8*
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+
+declare void @emscripten_asm_const_int(...)
+declare void @emscripten_asm_const_double(...)
+declare void @emscripten_landingpad(...)
+declare void @emscripten_resume(...)
+
+define void @test(i32 %arg) {
+  call void (...)* @emscripten_asm_const_int(i32 %arg)
+  call void (...)* @emscripten_asm_const_double(i32 %arg)
+  call void (...)* @emscripten_landingpad(i32 %arg)
+  call void (...)* @emscripten_resume(i32 %arg)
+  ret void
+}
+; CHECK-LABEL: define void @test(
+; CHECK-NEXT: call void (...)* @emscripten_asm_const_int(i32 %arg)
+; CHECK-NEXT: call void (...)* @emscripten_asm_const_double(i32 %arg)
+; CHECK-NEXT: call void (...)* @emscripten_landingpad(i32 %arg)
+; CHECK-NEXT: call void (...)* @emscripten_resume(i32 %arg)
+; CHECK-NEXT: ret void
diff --git a/test/Transforms/NaCl/globalcleanup.ll b/test/Transforms/NaCl/globalcleanup.ll
index 5f976e22da6..3bb469364c9 100644
--- a/test/Transforms/NaCl/globalcleanup.ll
+++ b/test/Transforms/NaCl/globalcleanup.ll
@@ -1,8 +1,15 @@
 ; RUN: opt < %s -nacl-global-cleanup -S | FileCheck %s
 ; RUN: opt < %s -nacl-global-cleanup -S | FileCheck -check-prefix=GV %s
+; RUN: opt < %s -nacl-global-cleanup -mtriple=asmjs-unknown-emscripten -S | FileCheck -check-prefix=EM %s
 
-@llvm.compiler.used = appending global [0 x i8*] zeroinitializer, section "llvm.metadata"
-@llvm.used = appending global [0 x i8*] zeroinitializer, section "llvm.metadata"
+@a = global i8 42
+
+@llvm.compiler.used = appending global [1 x i8*] [i8* @a], section "llvm.metadata"
+@llvm.used = appending global [1 x i8*] [i8* @a], section "llvm.metadata"
+
+; Emscripten preserves llvm.used but not llvm.compiler.used.
+; EM-NOT: llvm.compiler.used
+; EM: llvm.used
 
 ; GV-NOT: llvm.used
 ; GV-NOT: llvm.compiler.used
diff --git a/test/Transforms/NaCl/internalize-used-globals.ll b/test/Transforms/NaCl/internalize-used-globals.ll
new file mode 100644
index 00000000000..8fd7e329ee3
--- /dev/null
+++ b/test/Transforms/NaCl/internalize-used-globals.ll
@@ -0,0 +1,34 @@
+; RUN: opt %s -internalize-used-globals  -S | FileCheck %s
+
+target datalayout = "e-p:32:32-i64:64"
+target triple = "le32-unknown-nacl"
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)], section "llvm.metadata"
+; The used list remains unchanged.
+; CHECK: @llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)], section "llvm.metadata"
+
+
+define hidden void @foo() #0 {
+  ret void
+}
+; Although in the used list, foo becomes internal.
+; CHECK-LABEL: define internal void @foo
+
+
+define i32 @_start() {
+entry:
+  ret i32 0
+}
+; @_start is left non-internal.
+; CHECK-LABEL: define i32 @_start
+
+define internal void @my_internal() {
+  ret void
+}
+
+; Internals are left as-is.
+; CHECK-LABEL: define internal void @my_internal()
+
+!llvm.ident = !{!0}
+!0 = metadata !{metadata !"clang version 3.5.0 "}
+
diff --git a/test/Transforms/NaCl/replace-ptrs-with-ints.ll b/test/Transforms/NaCl/replace-ptrs-with-ints.ll
index aaba9da0b7d..9d2974f4c7f 100644
--- a/test/Transforms/NaCl/replace-ptrs-with-ints.ll
+++ b/test/Transforms/NaCl/replace-ptrs-with-ints.ll
@@ -490,6 +490,16 @@ define void @readonly_readnone(i8* readonly readnone) {
 }
 ; CHECK-LABEL: define void @readonly_readnone(i32)
 
+define nonnull i8* @nonnull_ptr(i8* nonnull) {
+  ret i8* undef
+}
+; CHECK-LABEL: define i32 @nonnull_ptr(i32)
+
+define dereferenceable(16) i8* @dereferenceable_ptr(i8* dereferenceable(8)) {
+  ret i8* undef
+}
+; CHECK-LABEL: define i32 @dereferenceable_ptr(i32)
+
 ; "nounwind" should be preserved.
 define void @nounwind_func_attr() nounwind {
   ret void
diff --git a/test/Transforms/NaCl/rewrite-assume.ll b/test/Transforms/NaCl/rewrite-assume.ll
new file mode 100644
index 00000000000..50e5d2bb6ff
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-assume.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s
+; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s -check-prefix=CLEANED
+; Test the @llvm.assume part of the RewriteLLVMIntrinsics pass
+
+declare void @llvm.assume(i1)
+
+; No declaration or definition of llvm.assume() should remain.
+; CLEANED-NOT: @llvm.assume
+
+define void @call_assume(i1 %val) {
+; CHECK: call_assume
+; CHECK-NEXT: ret void
+  call void @llvm.assume(i1 %val)
+  ret void
+}
+
+; A more complex example with a number of calls in several BBs.
+define void @multiple_calls(i1 %val) {
+; CHECK: multiple_calls
+entryblock:
+; CHECK: entryblock
+; CHECK-NEXT: br
+  call void @llvm.assume(i1 %val)
+  br i1 %val, label %exitblock, label %never
+never:
+; CHECK: never:
+; CHECK-NEXT: br
+  call void @llvm.assume(i1 %val)
+  br label %exitblock
+exitblock:
+; CHECK: exitblock:
+; CHECK-NEXT: ret void
+  call void @llvm.assume(i1 %val)
+  ret void
+}
diff --git a/test/Transforms/NaCl/simplify-struct-reg-pad-crash.ll b/test/Transforms/NaCl/simplify-struct-reg-pad-crash.ll
new file mode 100644
index 00000000000..2a854135328
--- /dev/null
+++ b/test/Transforms/NaCl/simplify-struct-reg-pad-crash.ll
@@ -0,0 +1,21 @@
+; RUN: not opt < %s -simplify-struct-reg-signatures -S
+
+%struct = type { i32, i32 }
+
+declare i32 @__hypothetical_personality_1(%struct)
+
+declare void @something_to_invoke()
+
+; landingpad with struct
+define void @landingpad_is_struct() {
+  invoke void @something_to_invoke()
+    to label %OK unwind label %Err
+
+OK:
+  ret void
+
+Err:
+  %exn = landingpad i32 personality i32(%struct)* @__hypothetical_personality_1
+      cleanup
+  resume i32 %exn
+}
\ No newline at end of file
diff --git a/test/Transforms/NaCl/simplify-struct-reg-resume-crash.ll b/test/Transforms/NaCl/simplify-struct-reg-resume-crash.ll
new file mode 100644
index 00000000000..0f7e519a879
--- /dev/null
+++ b/test/Transforms/NaCl/simplify-struct-reg-resume-crash.ll
@@ -0,0 +1,20 @@
+; RUN: not opt < %s -simplify-struct-reg-signatures -S
+
+%struct = type { i8*, void(%struct)* }
+
+declare i32 @__gxx_personality_v0(...)
+declare void @something_to_invoke()
+
+; landingpad with struct
+define void @landingpad_is_struct(%struct %str) {
+  invoke void @something_to_invoke()
+    to label %OK unwind label %Err
+
+OK:
+  ret void
+
+Err:
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+    cleanup
+  resume %struct %str
+}
\ No newline at end of file
diff --git a/test/Transforms/NaCl/simplify-struct-reg-signatures.ll b/test/Transforms/NaCl/simplify-struct-reg-signatures.ll
new file mode 100644
index 00000000000..26e7bce0921
--- /dev/null
+++ b/test/Transforms/NaCl/simplify-struct-reg-signatures.ll
@@ -0,0 +1,265 @@
+; RUN: opt %s -simplify-struct-reg-signatures -S | FileCheck %s
+
+declare i32 @__gxx_personality_v0(...)
+
+%struct = type { i32, i32 }
+
+%rec_struct = type {%rec_struct*}
+%rec_problem_struct = type{void (%rec_problem_struct)*}
+%rec_pair_1 = type {%rec_pair_2*}
+%rec_pair_2 = type {%rec_pair_1*}
+%rec_returning = type { %rec_returning (%rec_returning)* }
+%direct_def = type { void(%struct)*, %struct }
+
+; new type declarations:
+; CHECK: %struct = type { i32, i32 }
+; CHECK-NEXT: %rec_struct = type { %rec_struct* }
+; CHECK-NEXT: %rec_problem_struct.simplified = type { void (%rec_problem_struct.simplified*)* }
+; CHECK-NEXT: %rec_pair_1 = type { %rec_pair_2* }
+; CHECK-NEXT: %rec_pair_2 = type { %rec_pair_1* }
+; CHECK-NEXT: %rec_returning.simplified = type { void (%rec_returning.simplified*, %rec_returning.simplified*)* }
+; CHECK-NEXT: %direct_def.simplified = type { void (%struct*)*, %struct }
+
+; externs
+declare void @extern_func(%struct)
+declare %struct @struct_returning_extern(i32, %struct)
+
+; verify that parameters are mapped correctly: single param, two, and combo
+; with non-struct regs
+; CHECK-NOT: declare void @extern_func(%struct)
+; CHECK-NOT: declare %struct @struct_returning_extern(i32, %struct)
+; CHECK-LABEL: declare void @extern_func(%struct* byval)
+; CHECK-LABEL: declare void @struct_returning_extern(%struct* sret, i32, %struct* byval)
+
+define void @main(%struct* byval %ptr) {
+  %val = load %struct* %ptr
+  call void @extern_func(%struct %val)
+  ret void
+}
+
+define void @two_param_func(%struct %val1, %struct %val2) {
+  call void @extern_func(%struct %val1)
+  call void @extern_func(%struct %val2)
+  ret void
+}
+
+; CHECK-LABEL: define void @two_param_func(%struct* byval %val1.ptr, %struct* byval %val2.ptr)
+; CHECK-NOT: define void @two_param_func(%struct %val1, %struct %val2)
+
+define i32 @another_func(i32 %a, %struct %str, i64 %b) {
+  call void @two_param_func(%struct %str, %struct %str)
+  call void @extern_func(%struct %str)
+  ret i32 0
+}
+
+; CHECK-LABEL: define i32 @another_func(i32 %a, %struct* byval %str.ptr, i64 %b)
+; CHECK: call void @two_param_func(%struct* byval %str.sreg.ptr, %struct* byval %str.sreg.ptr1)
+
+define %struct @returns_struct(i32 %an_int, %struct %val) {
+  %tmp = call %struct @struct_returning_extern(i32 %an_int, %struct %val)
+  %tmp2 = invoke %struct @struct_returning_extern(i32 1, %struct %tmp)
+    to label %Cont unwind label %Cleanup
+
+Cont:
+  ret %struct %tmp2
+Cleanup:
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+    cleanup
+  resume {i8*, i32} %exn
+}
+
+; verify return value and codegen
+; CHECK-LABEL: define void @returns_struct(%struct* sret %retVal, i32 %an_int, %struct* byval %val.ptr)
+; CHECK-NEXT:  %tmp2 = alloca %struct
+; CHECK-NEXT:  %tmp.sreg.ptr = alloca %struct
+; CHECK-NEXT:  %tmp = alloca %struct
+; CHECK-NEXT:  %val.sreg.ptr = alloca %struct
+; CHECK-NEXT:  %val.sreg = load %struct* %val.ptr
+; CHECK-NEXT:  store %struct %val.sreg, %struct* %val.sreg.ptr
+; CHECK-NEXT:  call void @struct_returning_extern(%struct* sret %tmp, i32 %an_int, %struct* byval %val.sreg.ptr)
+; CHECK-NEXT:  %tmp.sreg = load %struct* %tmp
+; CHECK-NEXT:  store %struct %tmp.sreg, %struct* %tmp.sreg.ptr
+; CHECK-NEXT:  invoke void @struct_returning_extern(%struct* sret %tmp2, i32 1, %struct* byval %tmp.sreg.ptr)
+; CHECK-NEXT:            to label %Cont unwind label %Cleanup
+; CHECK-DAG:   Cont:
+; CHECK-NEXT:    %tmp2.sreg = load %struct* %tmp2
+; CHECK-NEXT:    store %struct %tmp2.sreg, %struct* %retVal
+; CHECK-NEXT:    ret void
+; CHECK-DAG:   Cleanup:
+; CHECK-NEXT:    %exn = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:    resume { i8*, i32 } %exn
+
+define i32 @lots_of_call_attrs() {
+  %tmp.0 = insertvalue %struct undef, i32 1, 0
+  %tmp.1 = insertvalue %struct %tmp.0, i32 2, 1
+  %ret = tail call zeroext i32 @another_func(i32 1, %struct %tmp.1, i64 2) readonly
+  ret i32 %ret
+}
+
+; verify attributes are copied
+; CHECK_LABEL: @lots_of_call_attrs
+; CHECK: %ret = tail call zeroext i32 @another_func(i32 1, %struct* byval %tmp.1.ptr, i64 2) #0
+; CHECK-NEXT: ret i32 %ret
+
+declare void @rec_struct_ok(%rec_struct*)
+declare void @rec_struct_mod(%rec_struct)
+
+; compliant recursive structs are kept as-is
+; CHECK-LABEL: declare void @rec_struct_ok(%rec_struct*)
+; CHECK-LABEL: declare void @rec_struct_mod(%rec_struct* byval)
+
+define void @rec_call_sreg(%rec_problem_struct %r) {
+  %tmp = extractvalue %rec_problem_struct %r, 0
+  call void %tmp(%rec_problem_struct %r)
+  ret void
+}
+
+; non-compliant structs are correctly mapped and calls are changed
+; CHECK-LABEL: define void @rec_call_sreg(%rec_problem_struct.simplified* byval %r.ptr)
+; CHECK: call void %tmp(%rec_problem_struct.simplified* byval %r.sreg.ptr)
+
+declare void @pairs(%rec_pair_1)
+
+define %rec_returning @rec_returning_fun(%rec_returning %str) {
+  %tmp = extractvalue %rec_returning %str, 0
+  %ret = call %rec_returning %tmp(%rec_returning %str)
+  ret %rec_returning %ret
+}
+
+; pair structs
+; CHECK-LABEL: declare void @pairs(%rec_pair_1* byval)
+; CHECK-LABEL: define void @rec_returning_fun(%rec_returning.simplified* sret %retVal, %rec_returning.simplified* byval %str.ptr)
+; CHECK-NEXT:   %ret = alloca %rec_returning.simplified
+; CHECK-NEXT:   %str.sreg.ptr = alloca %rec_returning.simplified
+; CHECK-NEXT:   %str.sreg = load %rec_returning.simplified* %str.ptr
+; CHECK-NEXT:   %tmp = extractvalue %rec_returning.simplified %str.sreg, 0
+; CHECK-NEXT:   store %rec_returning.simplified %str.sreg, %rec_returning.simplified* %str.sreg.ptr
+; CHECK-NEXT:   call void %tmp(%rec_returning.simplified* sret %ret, %rec_returning.simplified* byval %str.sreg.ptr)
+; CHECK-NEXT:   %ret.sreg = load %rec_returning.simplified* %ret
+; CHECK-NEXT:   store %rec_returning.simplified %ret.sreg, %rec_returning.simplified* %retVal
+; CHECK-NEXT:   ret void
+
+define void @direct_caller(%direct_def %def) {
+  %func = extractvalue %direct_def %def, 0
+  %param = extractvalue %direct_def %def, 1
+  call void %func(%struct %param)
+  ret void
+}
+
+; CHECK-LABEL: define void @direct_caller(%direct_def.simplified* byval %def.ptr)
+; CHECK-NEXT:  %param.ptr = alloca %struct
+; CHECK-NEXT:  %def.sreg = load %direct_def.simplified* %def.ptr
+; CHECK-NEXT:  %func = extractvalue %direct_def.simplified %def.sreg, 0
+; CHECK-NEXT:  %param = extractvalue %direct_def.simplified %def.sreg, 1
+; CHECK-NEXT:  store %struct %param, %struct* %param.ptr
+; CHECK-NEXT:  call void %func(%struct* byval %param.ptr)
+; CHECK-NEXT:  ret void
+
+; vararg functions are converted correctly
+declare void @vararg_ok(i32, ...)
+; CHECK-LABEL: declare void @vararg_ok(i32, ...)
+
+define void @vararg_problem(%rec_problem_struct %arg1, ...) {
+  ; CHECK-LABEL: define void @vararg_problem(%rec_problem_struct.simplified* byval %arg1.ptr, ...)
+   ret void
+}
+
+%vararg_fp_struct = type { i32, void (i32, ...)* }
+declare void @vararg_fp_fct(%vararg_fp_struct %arg)
+;CHECK-LABEL: declare void @vararg_fp_fct(%vararg_fp_struct* byval)
+
+define void @call_vararg(%vararg_fp_struct %param1, ...) {
+  %fptr = extractvalue %vararg_fp_struct %param1, 1
+  call void (i32, ...)* %fptr(i32 0, i32 1)
+  ret void
+}
+
+; CHECK-LABEL: define void @call_vararg(%vararg_fp_struct* byval %param1.ptr, ...)
+; CHECK-NEXT:  %param1.sreg = load %vararg_fp_struct* %param1.ptr
+; CHECK-NEXT:  %fptr = extractvalue %vararg_fp_struct %param1.sreg, 1
+; CHECK-NEXT:  call void (i32, ...)* %fptr(i32 0, i32 1)
+; CHECK-NEXT:  ret void
+
+%vararg_fp_problem_struct = type { void(%vararg_fp_problem_struct)* }
+define void @vararg_fp_problem_call(%vararg_fp_problem_struct* byval %param) {
+  %fct_ptr = getelementptr %vararg_fp_problem_struct* %param, i32 0, i32 0
+  %fct = load void(%vararg_fp_problem_struct)** %fct_ptr
+  %param_for_call = load %vararg_fp_problem_struct* %param
+  call void %fct(%vararg_fp_problem_struct %param_for_call)
+  ret void
+}
+
+; CHECK-LABEL: define void @vararg_fp_problem_call(%vararg_fp_problem_struct.simplified* byval %param)
+; CHECK-NEXT:  %param_for_call.ptr = alloca %vararg_fp_problem_struct.simplified
+; CHECK-NEXT:  %fct_ptr = getelementptr %vararg_fp_problem_struct.simplified* %param, i32 0, i32 0
+; CHECK-NEXT:  %fct = load void (%vararg_fp_problem_struct.simplified*)** %fct_ptr
+; CHECK-NEXT:  %param_for_call = load %vararg_fp_problem_struct.simplified* %param
+; CHECK-NEXT:  store %vararg_fp_problem_struct.simplified %param_for_call, %vararg_fp_problem_struct.simplified* %param_for_call.ptr
+; CHECK-NEXT:  call void %fct(%vararg_fp_problem_struct.simplified* byval %param_for_call.ptr)
+; CHECK-NEXT:  ret void
+
+define void @call_with_array([4 x void(%struct)*] %fptrs, %struct %str) {
+  %fptr = extractvalue [4 x void(%struct)*] %fptrs, 2
+  call void %fptr(%struct %str)
+  ret void
+}
+
+; CHECK-LABEL: define void @call_with_array([4 x void (%struct*)*]* byval %fptrs.ptr, %struct* byval %str.ptr)
+; CHECK-NEXT:  %str.sreg.ptr = alloca %struct
+; CHECK-NEXT:  %fptrs.sreg = load [4 x void (%struct*)*]* %fptrs.ptr
+; CHECK-NEXT:  %str.sreg = load %struct* %str.ptr
+; CHECK-NEXT:  %fptr = extractvalue [4 x void (%struct*)*] %fptrs.sreg, 2
+; CHECK-NEXT:  store %struct %str.sreg, %struct* %str.sreg.ptr
+; CHECK-NEXT:  call void %fptr(%struct* byval %str.sreg.ptr)
+; CHECK-NEXT:  ret void
+
+define void @call_with_array_ptr([4 x void(%struct)*]* %fptrs, %struct %str) {
+  %fptr_ptr = getelementptr [4 x void(%struct)*]* %fptrs, i32 0, i32 2
+  %fptr = load void(%struct)** %fptr_ptr
+  call void %fptr(%struct %str)
+  ret void
+}
+
+; CHECK-LABEL: define void @call_with_array_ptr([4 x void (%struct*)*]* %fptrs, %struct* byval %str.ptr)
+; CHECK-NEXT:  %str.sreg.ptr = alloca %struct
+; CHECK-NEXT:  %str.sreg = load %struct* %str.ptr
+; CHECK-NEXT:  %fptr_ptr = getelementptr [4 x void (%struct*)*]* %fptrs, i32 0, i32 2
+; CHECK-NEXT:  %fptr = load void (%struct*)** %fptr_ptr
+; CHECK-NEXT:  store %struct %str.sreg, %struct* %str.sreg.ptr
+; CHECK-NEXT:  call void %fptr(%struct* byval %str.sreg.ptr)
+; CHECK-NEXT:  ret void
+
+define void @call_with_vector(<4 x void (%struct)*> %fptrs, %struct %str) {
+  %fptr = extractelement <4 x void (%struct)*> %fptrs, i32 2
+  call void %fptr(%struct %str)
+  ret void
+}
+
+; CHECK-LABEL: define void @call_with_vector(<4 x void (%struct*)*> %fptrs, %struct* byval %str.ptr)
+; CHECK-NEXT:  %str.sreg.ptr = alloca %struct
+; CHECK-NEXT:  %str.sreg = load %struct* %str.ptr
+; CHECK-NEXT:  %fptr = extractelement <4 x void (%struct*)*> %fptrs, i32 2
+; CHECK-NEXT:  store %struct %str.sreg, %struct* %str.sreg.ptr
+; CHECK-NEXT:  call void %fptr(%struct* byval %str.sreg.ptr)
+; CHECK-NEXT:  ret void
+
+define void @call_with_array_vect([4 x <2 x void(%struct)*>] %fptrs, %struct %str) {
+  %vect = extractvalue [4 x <2 x void(%struct)*>] %fptrs, 2
+  %fptr = extractelement <2 x void (%struct)*> %vect, i32 1
+  call void %fptr(%struct %str)
+  ret void
+}
+
+; CHECK-LABEL: define void @call_with_array_vect([4 x <2 x void (%struct*)*>]* byval %fptrs.ptr, %struct* byval %str.ptr)
+; CHECK-NEXT:  %str.sreg.ptr = alloca %struct
+; CHECK-NEXT:  %fptrs.sreg = load [4 x <2 x void (%struct*)*>]* %fptrs.ptr
+; CHECK-NEXT:  %str.sreg = load %struct* %str.ptr
+; CHECK-NEXT:  %vect = extractvalue [4 x <2 x void (%struct*)*>] %fptrs.sreg, 2
+; CHECK-NEXT:  %fptr = extractelement <2 x void (%struct*)*> %vect, i32 1
+; CHECK-NEXT:  store %struct %str.sreg, %struct* %str.sreg.ptr
+; CHECK-NEXT:  call void %fptr(%struct* byval %str.sreg.ptr)
+; CHECK-NEXT:  ret void
+
+; this is at the end, corresponds to the call marked as readonly
+; CHECK: attributes #0 = { readonly }
\ No newline at end of file
diff --git a/test/Transforms/NaCl/simplify-struct-reg-vararg-crash.ll b/test/Transforms/NaCl/simplify-struct-reg-vararg-crash.ll
new file mode 100644
index 00000000000..2b0e59fe833
--- /dev/null
+++ b/test/Transforms/NaCl/simplify-struct-reg-vararg-crash.ll
@@ -0,0 +1,10 @@
+; RUN: not opt < %s -simplify-struct-reg-signatures -S
+
+%struct = type { i32, i32 }
+
+declare void @vararg_fct(...)
+
+define void @vararg_caller_with_agg(%struct %str) {
+  call void(...)* @vararg_fct(%struct %str)
+  ret void
+}
\ No newline at end of file
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
index 2e7299a4676..93811b90016 100644
--- a/tools/bugpoint/bugpoint.cpp
+++ b/tools/bugpoint/bugpoint.cpp
@@ -161,6 +161,8 @@ int main(int argc, char **argv) {
   initializeGlobalCleanupPass(Registry);
   initializeGlobalizeConstantVectorsPass(Registry);
   initializeInsertDivideCheckPass(Registry);
+  initializeInternalizeUsedGlobalsPass(Registry);
+  initializeSimplifyStructRegSignaturesPass(Registry);
   initializePNaClABIVerifyFunctionsPass(Registry);
   initializePNaClABIVerifyModulePass(Registry);
   initializePNaClSjLjEHPass(Registry);
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index a6a99fab8a2..4d4b8642dfa 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -391,6 +391,8 @@ int main(int argc, char **argv) {
   initializeGlobalCleanupPass(Registry);
   initializeGlobalizeConstantVectorsPass(Registry);
   initializeInsertDivideCheckPass(Registry);
+  initializeInternalizeUsedGlobalsPass(Registry);
+  initializeSimplifyStructRegSignaturesPass(Registry);
   initializePNaClABIVerifyFunctionsPass(Registry);
   initializePNaClABIVerifyModulePass(Registry);
   initializePNaClSjLjEHPass(Registry);
diff --git a/tools/pnacl-bcdis/pnacl-bcdis.cpp b/tools/pnacl-bcdis/pnacl-bcdis.cpp
index bb6c8e3b00a..38de6733f1b 100644
--- a/tools/pnacl-bcdis/pnacl-bcdis.cpp
+++ b/tools/pnacl-bcdis/pnacl-bcdis.cpp
@@ -64,7 +64,8 @@ static bool DisassembleBitcode() {
   }
 
   // Parse the the bitcode file.
-  return NaClObjDump(ErrOrFile.get().release(), Output, NoRecords, NoAssembly);
+  return NaClObjDump(ErrOrFile.get()->getMemBufferRef(), Output, NoRecords,
+                     NoAssembly);
 }
 
 }
diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp
index cacbde66f35..6dc9b6d01e7 100644
--- a/unittests/ADT/TripleTest.cpp
+++ b/unittests/ADT/TripleTest.cpp
@@ -663,5 +663,9 @@ TEST(TripleTest, getARMCPUForArch) {
     EXPECT_STREQ("cortex-a8", Triple.getARMCPUForArch());
     EXPECT_STREQ("swift", Triple.getARMCPUForArch("armv7s"));
   }
+  {
+    llvm::Triple Triple("arm--nacl");
+    EXPECT_STREQ("cortex-a8", Triple.getARMCPUForArch("arm"));
+  }
 }
 }