From b82e6c61c08dcd77a97cf6cb5020898bb49f97a8 Mon Sep 17 00:00:00 2001 From: Derek Schuff <dschuff@chromium.org> Date: Thu, 19 Mar 2015 10:11:38 -0700 Subject: [PATCH 01/16] Use movw/movt instead of constant pool loads to lower byval parameter copies The ARM backend can use a loop to implement copying byval parameters before a call. In non-thumb2 mode it uses a constant pool load to materialize the trip count. Since constpools go into rodata in NaCl instead of text, they can sometimes be placed too far away to reach with a load instruction. Instead, use movw/movt as in thumb mode to materialize the trip count. R=jfb@chromium.org BUG= https://code.google.com/p/nativeclient/issues/detail?id=4115 Review URL: https://codereview.chromium.org/1019483003 --- lib/Target/ARM/ARMISelLowering.cpp | 12 +++++++----- test/CodeGen/ARM/struct_byval.ll | 4 ++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 337a4ad6242..dc22781bfb3 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -7459,16 +7459,18 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // Load an immediate to varEnd. unsigned varEnd = MRI.createVirtualRegister(TRC); - if (IsThumb2) { + if (IsThumb2 || Subtarget->useMovt(*MF)) { unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) Vtmp = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp) - .addImm(LoopSize & 0xFFFF)); + AddDefaultPred(BuildMI(BB, dl, + TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16), + Vtmp).addImm(LoopSize & 0xFFFF)); if ((LoopSize & 0xFFFF0000) != 0) - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) - .addReg(Vtmp).addImm(LoopSize >> 16)); + AddDefaultPred(BuildMI(BB, dl, + TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16), + varEnd).addReg(Vtmp).addImm(LoopSize >> 16)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); diff --git a/test/CodeGen/ARM/struct_byval.ll b/test/CodeGen/ARM/struct_byval.ll index 130925a0c23..4aceec47fbd 100644 --- a/test/CodeGen/ARM/struct_byval.ll +++ b/test/CodeGen/ARM/struct_byval.ll @@ -1,6 +1,10 @@ ; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s ; RUN: llc < %s -mtriple=thumbv7-apple-ios6.0 | FileCheck %s -check-prefix=THUMB +; For NaCl (where we always want movt) ensure no const pool loads are generated. +; RUN: llc < %s -mtriple=armv7-unknown-nacl-gnueabi | FileCheck %s -check-prefix=NACL +; NACL-NOT: .LCPI + ; rdar://9877866 %struct.SmallStruct = type { i32, [8 x i32], [37 x i8] } %struct.LargeStruct = type { i32, [1001 x i8], [300 x i32] } From 1352ceb5438aa289b24f38dd5dda9900dd8b37f6 Mon Sep 17 00:00:00 2001 From: Mircea Trofin <mtrofin@chromium.org> Date: Thu, 19 Mar 2015 15:41:37 -0700 Subject: [PATCH 02/16] CMake option to install utils, e.g. FileCheck The goal is to replace nacl tests' use of a hand-crafted filecheck tool, with LLVM's FileCheck. To do so, we need to first ensure FileCheck is copied to the install directory. To that end, this CL introduces a build flag, LLVM_INSTALL_UTILS. A separate CL enables this switch in toolchain_build_pnacl.py, see https://codereview.chromium.org/1018123002/ BUG=NONE R=dschuff@chromium.org Review URL: https://codereview.chromium.org/1019043002 --- CMakeLists.txt | 3 +++ cmake/modules/AddLLVM.cmake | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 03404de8009..9f00c8933cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,6 +65,9 @@ if (NOT PACKAGE_VERSION) set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}svn") endif() +# @LOCALMOD +option(LLVM_INSTALL_UTILS "Include utility binaries in the 'install' target." OFF) + option(LLVM_INSTALL_TOOLCHAIN_ONLY "Only include toolchain files in the 'install' target." OFF) option(LLVM_USE_FOLDERS "Enable solution folders in Visual Studio. Disable for Express versions." ON) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 2ef73fb8e5d..0fa32c73589 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -500,6 +500,11 @@ endmacro(add_llvm_example name) macro(add_llvm_utility name) add_llvm_executable(${name} ${ARGN}) set_target_properties(${name} PROPERTIES FOLDER "Utils") +# @LOCALMOD-BEGIN + if( LLVM_INSTALL_UTILS ) + install (TARGETS ${name} RUNTIME DESTINATION bin) + endif() +# @LOCALMOD-END endmacro(add_llvm_utility name) From a508ed2cd2f86a4e055aaeb3761d6721f2d388a6 Mon Sep 17 00:00:00 2001 From: JF Bastien <jfb@chromium.org> Date: Fri, 20 Mar 2015 10:46:08 -0700 Subject: [PATCH 03/16] LLVM: add support for asmjs arch and Emscripten OS This only adds support for the arch/OS and doesn't allow anything else in LLVM for now. There's a corresponding clang patch to allow IR generation. clang patch: https://codereview.chromium.org/1022123003 R=jvoung@chromium.org, azakai@mozilla.com, sunfish@mozilla.com BUG= https://code.google.com/p/nativeclient/issues/detail?id=4102 TEST= make check-all Review URL: https://codereview.chromium.org/1024073002 --- include/llvm/ADT/Triple.h | 33 +++++++++++++++++++++++++++++++++ lib/Support/Triple.cpp | 10 ++++++++++ 2 files changed, 43 insertions(+) diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index 419b8e9622a..956d0e7416b 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -73,6 +73,7 @@ class Triple { nvptx64, // NVPTX: 64-bit le32, // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten) le64, // le64: generic little-endian 64-bit CPU (PNaCl / Emscripten) + asmjs, // asm.js JavaScript subset @LOCALMOD Emscripten amdil, // AMDIL amdil64, // AMDIL with 64-bit pointers hsail, // AMD HSAIL @@ -133,6 +134,7 @@ class Triple { Haiku, Minix, RTEMS, + Emscripten, // Emscripten JavaScript runtime @LOCALMOD Emscripten NaCl, // Native Client CNK, // BG/P Compute-Node Kernel Bitrig, @@ -378,6 +380,30 @@ class Triple { bool isOSMSVCRT() const { return false; } bool isOSWindows() const { return false; } bool isOSNaCl() const { return true; } + bool isOSEmscripten() const { return false; } + bool isOSLinux() const { return false; } + bool isOSBinFormatELF() const { return true; } + bool isOSBinFormatCOFF() const { return false; } + bool isOSBinFormatMachO() const { return false; } +#elif defined(__EMSCRIPTEN__) + bool isMacOSX() const { return false; } + bool isiOS() const { return false; } + bool isOSDarwin() const { return false; } + bool isOSNetBSD() const { return false; } + bool isOSOpenBSD() const { return false; } + bool isOSFreeBSD() const { return false; } + bool isOSSolaris() const { return false; } + bool isOSBitrig() const { return false; } + bool isWindowsMSVCEnvironment() const { return false; } + bool isKnownWindowsMSVCEnvironment() const { return false; } + bool isWindowsItaniumEnvironment() const { return false; } + bool isWindowsCygwinEnvironment() const { return false; } + bool isWindowsGNUEnvironment() const { return false; } + bool isOSCygMing() const { return false; } + bool isOSMSVCRT() const { return false; } + bool isOSWindows() const { return false; } + bool isOSNaCl() const { return false; } + bool isOSEmscripten() const { return true; } bool isOSLinux() const { return false; } bool isOSBinFormatELF() const { return true; } bool isOSBinFormatCOFF() const { return false; } @@ -462,6 +488,13 @@ class Triple { return getOS() == Triple::NaCl; } + // @LOCALMOD-START Emscripten + /// \brief Tests whether the OS is Emscripten. + bool isOSEmscripten() const { + return getOS() == Triple::Emscripten; + } + // @LOCALMOD-END Emscripten + /// \brief Tests whether the OS is Linux. bool isOSLinux() const { return getOS() == Triple::Linux; diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 332606a6839..d436b493e85 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -46,6 +46,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { case nvptx64: return "nvptx64"; case le32: return "le32"; case le64: return "le64"; + case asmjs: return "asmjs"; // @LOCALMOD Emscripten case amdil: return "amdil"; case amdil64: return "amdil64"; case hsail: return "hsail"; @@ -100,6 +101,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case le32: return "le32"; case le64: return "le64"; + case asmjs: return "asmjs"; // @LOCALMOD Emscripten + case amdil: case amdil64: return "amdil"; @@ -151,6 +154,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case Haiku: return "haiku"; case Minix: return "minix"; case RTEMS: return "rtems"; + case Emscripten: return "emscripten"; // @LOCALMOD Emscripten case NaCl: return "nacl"; case CNK: return "cnk"; case Bitrig: return "bitrig"; @@ -212,6 +216,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("nvptx64", nvptx64) .Case("le32", le32) .Case("le64", le64) + .Case("asmjs", asmjs) // @LOCALMOD Emscripten .Case("amdil", amdil) .Case("amdil64", amdil64) .Case("hsail", hsail) @@ -295,6 +300,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("nvptx64", Triple::nvptx64) .Case("le32", Triple::le32) .Case("le64", Triple::le64) + .Case("asmjs", Triple::asmjs) // @LOCALMOD Emscripten .Case("amdil", Triple::amdil) .Case("amdil64", Triple::amdil64) .Case("hsail", Triple::hsail) @@ -339,6 +345,7 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("haiku", Triple::Haiku) .StartsWith("minix", Triple::Minix) .StartsWith("rtems", Triple::RTEMS) + .StartsWith("emscripten", Triple::Emscripten) // @LOCALMOD Emscripten .StartsWith("nacl", Triple::NaCl) .StartsWith("cnk", Triple::CNK) .StartsWith("bitrig", Triple::Bitrig) @@ -843,6 +850,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::armeb: case llvm::Triple::hexagon: case llvm::Triple::le32: + case llvm::Triple::asmjs: // @LOCALMOD Emscripten case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::nvptx: @@ -911,6 +919,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::hexagon: case Triple::kalimba: case Triple::le32: + case Triple::asmjs: // @LOCALMOD Emscripten case Triple::mips: case Triple::mipsel: case Triple::nvptx: @@ -953,6 +962,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::thumb: case Triple::thumbeb: case Triple::xcore: + case Triple::asmjs: // @LOCALMOD Emscripten T.setArch(UnknownArch); break; From 6fbcecb7791690603fe502d3469cf6b639499e37 Mon Sep 17 00:00:00 2001 From: Mircea Trofin <mtrofin@chromium.org> Date: Mon, 23 Mar 2015 13:46:44 -0700 Subject: [PATCH 04/16] Lower signatures exposing struct registers to byval struct pointers The new phase, NormalizeStructRegSignatures, converts signatures having parameters, or returning struct registers; or using structs that transitively reference such function types. The phase relies on a subsequent phase to clean up the redundant alloca/load/store instructions, however, I'm still investigating which such phase should be. BUG= https://code.google.com/p/nativeclient/issues/detail?id=3857 R=dschuff@chromium.org, jfb@chromium.org, mseaborn@chromium.org Review URL: https://codereview.chromium.org/992493002 --- include/llvm/InitializePasses.h | 1 + include/llvm/Transforms/NaCl.h | 1 + lib/Transforms/NaCl/CMakeLists.txt | 1 + lib/Transforms/NaCl/PNaClABISimplify.cpp | 3 + .../NaCl/SimplifyStructRegSignatures.cpp | 697 ++++++++++++++++++ .../NaCl/simplify-struct-reg-pad-crash.ll | 21 + .../NaCl/simplify-struct-reg-resume-crash.ll | 20 + .../NaCl/simplify-struct-reg-signatures.ll | 265 +++++++ .../NaCl/simplify-struct-reg-vararg-crash.ll | 10 + tools/bugpoint/bugpoint.cpp | 1 + tools/opt/opt.cpp | 1 + 11 files changed, 1021 insertions(+) create mode 100644 lib/Transforms/NaCl/SimplifyStructRegSignatures.cpp create mode 100644 test/Transforms/NaCl/simplify-struct-reg-pad-crash.ll create mode 100644 test/Transforms/NaCl/simplify-struct-reg-resume-crash.ll create mode 100644 test/Transforms/NaCl/simplify-struct-reg-signatures.ll create mode 100644 test/Transforms/NaCl/simplify-struct-reg-vararg-crash.ll diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index de44b7c5ec6..288fa206b4c 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -313,6 +313,7 @@ void initializeGlobalCleanupPass(PassRegistry&); void initializeGlobalizeConstantVectorsPass(PassRegistry&); void initializeInsertDivideCheckPass(PassRegistry&); void initializeNaClCcRewritePass(PassRegistry&); +void initializeSimplifyStructRegSignaturesPass(PassRegistry&); void initializePNaClABIVerifyFunctionsPass(PassRegistry&); void initializePNaClABIVerifyModulePass(PassRegistry&); void initializePNaClSjLjEHPass(PassRegistry&); diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h index 9de6c23fccc..d068d43299a 100644 --- a/include/llvm/Transforms/NaCl.h +++ b/include/llvm/Transforms/NaCl.h @@ -52,6 +52,7 @@ ModulePass *createExpandVarArgsPass(); ModulePass *createFlattenGlobalsPass(); ModulePass *createGlobalCleanupPass(); ModulePass *createGlobalizeConstantVectorsPass(); +ModulePass *createSimplifyStructRegSignaturesPass(); ModulePass *createPNaClSjLjEHPass(); ModulePass *createReplacePtrsWithIntsPass(); ModulePass *createResolveAliasesPass(); diff --git a/lib/Transforms/NaCl/CMakeLists.txt b/lib/Transforms/NaCl/CMakeLists.txt index 6c35c792ece..857b59f27c2 100644 --- a/lib/Transforms/NaCl/CMakeLists.txt +++ b/lib/Transforms/NaCl/CMakeLists.txt @@ -34,6 +34,7 @@ add_llvm_library(LLVMNaClTransforms RewriteLLVMIntrinsics.cpp RewritePNaClLibraryCalls.cpp SimplifyAllocas.cpp + SimplifyStructRegSignatures.cpp StripAttributes.cpp StripMetadata.cpp ) diff --git a/lib/Transforms/NaCl/PNaClABISimplify.cpp b/lib/Transforms/NaCl/PNaClABISimplify.cpp index e5d2fbd4b38..c8dcae1d19d 100644 --- a/lib/Transforms/NaCl/PNaClABISimplify.cpp +++ b/lib/Transforms/NaCl/PNaClABISimplify.cpp @@ -154,6 +154,9 @@ void llvm::PNaClABISimplifyAddPostOptPasses(PassManagerBase &PM) { // ConstantExprs have already been expanded out. PM.add(createReplacePtrsWithIntsPass()); + // Convert struct reg function params to struct* byval + PM.add(createSimplifyStructRegSignaturesPass()); + // The atomic cmpxchg instruction returns a struct, and is rewritten to an // intrinsic as a post-opt pass, we therefore need to expand struct regs. PM.add(createExpandStructRegsPass()); diff --git a/lib/Transforms/NaCl/SimplifyStructRegSignatures.cpp b/lib/Transforms/NaCl/SimplifyStructRegSignatures.cpp new file mode 100644 index 00000000000..48cbbde3867 --- /dev/null +++ b/lib/Transforms/NaCl/SimplifyStructRegSignatures.cpp @@ -0,0 +1,697 @@ +//===- SimplifyStructRegSignatures.cpp - struct regs to struct pointers----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass replaces function signatures exposing struct registers +// to byval pointer-based signatures. +// +// There are 2 types of signatures that are thus changed: +// +// @foo(%some_struct %val) -> @foo(%some_struct* byval %val) +// and +// %someStruct @bar(<other_args>) -> void @bar(%someStruct* sret, <other_args>) +// +// Such function types may appear in other type declarations, for example: +// +// %a_struct = type { void (%some_struct)*, i32 } +// +// We map such types to corresponding types, mapping the function types +// appropriately: +// +// %a_struct.0 = type { void (%some_struct*)*, i32 } +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/PassInfo.h" +#include "llvm/PassRegistry.h" +#include "llvm/PassSupport.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Support/Debug.h" + +#include <cassert> +#include <cstddef> + +using namespace llvm; + +namespace { + +static const unsigned int TypicalFuncArity = 8; +static const unsigned int TypicalStructArity = 8; + +class MappingResult { +public: + MappingResult(Type *ATy, bool Chg) { + Ty = ATy; + Changed = Chg; + } + + bool isChanged() { return Changed; } + + Type *operator->() { return Ty; } + + operator Type *() { return Ty; } + +private: + Type *Ty; + bool Changed; +}; + +// Utility class. For any given type, get the associated type that is free of +// struct register arguments. +class TypeMapper { +public: + typedef DenseMap<StructType *, StructType *> StructMap; + Type *getSimpleType(LLVMContext &Ctx, Type *Ty); + +private: + DenseMap<Type *, Type *> MappedTypes; + MappingResult getSimpleArgumentType(LLVMContext &Ctx, Type *Ty, + StructMap &Tentatives); + MappingResult getSimpleAggregateTypeInternal(LLVMContext &Ctx, Type *Ty, + StructMap &Tentatives); + + bool isChangedStruct(LLVMContext &Ctx, StructType *StructTy, + SmallVector<Type *, TypicalStructArity> &ElemTypes, + StructMap &Tentatives); +}; + +// This is a ModulePass because the pass recreates functions in +// order to change their signatures. +class SimplifyStructRegSignatures : public ModulePass { +public: + static char ID; + + SimplifyStructRegSignatures() : ModulePass(ID) { + initializeSimplifyStructRegSignaturesPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnModule(Module &M); + +private: + TypeMapper Mapper; + DenseSet<Function *> FunctionsToDelete; + SetVector<CallInst *> CallsToPatch; + SetVector<InvokeInst *> InvokesToPatch; + DenseMap<Function *, Function *> FunctionMap; + bool + simplifyFunction(LLVMContext &Ctx, Function *OldFunc, + DenseMap<const Function *, DISubprogram> &DISubprogramMap); + void scheduleInstructionsForCleanup(Function *NewFunc); + template <class TCall> + void fixCallSite(LLVMContext &Ctx, TCall *Call, unsigned PreferredAlignment); + void fixFunctionBody(LLVMContext &Ctx, Function *OldFunc, Function *NewFunc); + + template <class TCall> + TCall *fixCallTargetAndArguments(LLVMContext &Ctx, IRBuilder<> &Builder, + TCall *OldCall, Value *NewTarget, + FunctionType *NewType, + BasicBlock::iterator AllocaInsPoint, + Value *ExtraArg = nullptr); + void checkNoUnsupportedInstructions(LLVMContext &Ctx, Function *Fct); +}; +} + +char SimplifyStructRegSignatures::ID = 0; + +INITIALIZE_PASS( + SimplifyStructRegSignatures, "simplify-struct-reg-signatures", + "Simplify function signatures by removing struct register parameters", + false, false) + +// The type is "simple" if it does not recursively reference a +// function type with at least an operand (arg or return) typed as struct +// register. +Type *TypeMapper::getSimpleType(LLVMContext &Ctx, Type *Ty) { + auto Found = MappedTypes.find(Ty); + if (Found != MappedTypes.end()) { + return Found->second; + } + + StructMap Tentatives; + auto Ret = getSimpleAggregateTypeInternal(Ctx, Ty, Tentatives); + assert(Tentatives.size() == 0); + + if (!Ty->isStructTy()) { + // Structs are memoized in getSimpleAggregateTypeInternal. + MappedTypes[Ty] = Ret; + } + return Ret; +} + +// Transforms any type that could transitively reference a function pointer +// into a simplified type. +// We enter this function trying to determine the mapping of a type. Because +// of how structs are handled (not interned by llvm - see further comments +// below) we may be working with temporary types - types (pointers, for example) +// transitively referencing "tentative" structs. For that reason, we do not +// memoize anything here, except for structs. The latter is so that we avoid +// unnecessary repeated creation of types (pointers, function types, etc), +// as we try to map a given type. +MappingResult +TypeMapper::getSimpleAggregateTypeInternal(LLVMContext &Ctx, Type *Ty, + StructMap &Tentatives) { + // Leverage the map for types we encounter on the way. + auto Found = MappedTypes.find(Ty); + if (Found != MappedTypes.end()) { + return {Found->second, Found->second != Ty}; + } + + if (auto *OldFnTy = dyn_cast<FunctionType>(Ty)) { + Type *OldRetType = OldFnTy->getReturnType(); + Type *NewRetType = OldRetType; + Type *Void = Type::getVoidTy(Ctx); + SmallVector<Type *, TypicalFuncArity> NewArgs; + bool Changed = false; + // Struct register returns become the first parameter of the new FT. + // The new FT has void for the return type + if (OldRetType->isAggregateType()) { + NewRetType = Void; + Changed = true; + NewArgs.push_back(getSimpleArgumentType(Ctx, OldRetType, Tentatives)); + } + for (auto OldParam : OldFnTy->params()) { + auto NewType = getSimpleArgumentType(Ctx, OldParam, Tentatives); + Changed |= NewType.isChanged(); + NewArgs.push_back(NewType); + } + Type *NewFuncType = + FunctionType::get(NewRetType, NewArgs, OldFnTy->isVarArg()); + return {NewFuncType, Changed}; + } + + if (auto PtrTy = dyn_cast<PointerType>(Ty)) { + auto NewTy = getSimpleAggregateTypeInternal( + Ctx, PtrTy->getPointerElementType(), Tentatives); + + return {NewTy->getPointerTo(PtrTy->getAddressSpace()), NewTy.isChanged()}; + } + + if (auto ArrTy = dyn_cast<ArrayType>(Ty)) { + auto NewTy = getSimpleAggregateTypeInternal( + Ctx, ArrTy->getArrayElementType(), Tentatives); + return {ArrayType::get(NewTy, ArrTy->getArrayNumElements()), + NewTy.isChanged()}; + } + + if (auto VecTy = dyn_cast<VectorType>(Ty)) { + auto NewTy = getSimpleAggregateTypeInternal( + Ctx, VecTy->getVectorElementType(), Tentatives); + return {VectorType::get(NewTy, VecTy->getVectorNumElements()), + NewTy.isChanged()}; + } + + // LLVM doesn't intern identified structs (the ones with a name). This, + // together with the fact that such structs can be recursive, + // complicates things a bit. We want to make sure that we only change + // "unsimplified" structs (those that somehow reference funcs that + // are not simple). + // We don't want to change "simplified" structs, otherwise converting + // instruction types will become trickier. + if (auto StructTy = dyn_cast<StructType>(Ty)) { + SmallVector<Type *, TypicalStructArity> ElemTypes; + if (!StructTy->isLiteral()) { + // Literals - struct without a name - cannot be recursive, so we + // don't need to form tentatives. + auto Found = Tentatives.find(StructTy); + + // Having a tentative means we are in a recursion trying to map this + // particular struct, so arriving back to it is not a change. + // We will determine if this struct is actually + // changed by checking its other fields. + if (Found != Tentatives.end()) { + return {Found->second, false}; + } + // We have never seen this struct, so we start a tentative. + std::string NewName = StructTy->getStructName(); + NewName += ".simplified"; + StructType *Tentative = StructType::create(Ctx, NewName); + Tentatives[StructTy] = Tentative; + + bool Changed = isChangedStruct(Ctx, StructTy, ElemTypes, Tentatives); + + Tentatives.erase(StructTy); + // We can now decide the mapping of the struct. We will register it + // early with MappedTypes, to avoid leaking tentatives unnecessarily. + // We are leaking the created struct here, but there is no way to + // correctly delete it. + if (!Changed) { + return {MappedTypes[StructTy] = StructTy, false}; + } else { + Tentative->setBody(ElemTypes, StructTy->isPacked()); + return {MappedTypes[StructTy] = Tentative, true}; + } + } else { + bool Changed = isChangedStruct(Ctx, StructTy, ElemTypes, Tentatives); + return {MappedTypes[StructTy] = + StructType::get(Ctx, ElemTypes, StructTy->isPacked()), + Changed}; + } + } + + // Anything else stays the same. + return {Ty, false}; +} + +bool TypeMapper::isChangedStruct( + LLVMContext &Ctx, StructType *StructTy, + SmallVector<Type *, TypicalStructArity> &ElemTypes, StructMap &Tentatives) { + bool Changed = false; + unsigned StructElemCount = StructTy->getStructNumElements(); + for (unsigned I = 0; I < StructElemCount; I++) { + auto NewElem = getSimpleAggregateTypeInternal( + Ctx, StructTy->getStructElementType(I), Tentatives); + ElemTypes.push_back(NewElem); + Changed |= NewElem.isChanged(); + } + return Changed; +} + +// Get the simplified type of a function argument. +MappingResult TypeMapper::getSimpleArgumentType(LLVMContext &Ctx, Type *Ty, + StructMap &Tentatives) { + // struct registers become pointers to simple structs + if (Ty->isAggregateType()) { + return MappingResult( + PointerType::get(getSimpleAggregateTypeInternal(Ctx, Ty, Tentatives), + 0), + true); + } + + return getSimpleAggregateTypeInternal(Ctx, Ty, Tentatives); +} + +// Apply 'byval' to func arguments that used to be struct regs. +// Apply 'sret' to the argument corresponding to the return in the old +// signature. +static void ApplyByValAndSRet(Function *OldFunc, Function *NewFunc) { + // When calling addAttribute, the first one refers to the function, so we + // skip past that. + unsigned ArgOffset = 1; + if (OldFunc->getReturnType()->isAggregateType()) { + NewFunc->addAttribute(1, Attribute::AttrKind::StructRet); + ArgOffset++; + } + + auto &NewArgList = NewFunc->getArgumentList(); + auto NewArg = NewArgList.begin(); + for (const Argument &OldArg : OldFunc->getArgumentList()) { + if (OldArg.getType()->isAggregateType()) { + NewFunc->addAttribute(NewArg->getArgNo() + ArgOffset, + Attribute::AttrKind::ByVal); + } + NewArg++; + } +} + +// Update the arg names for a newly created function. +static void UpdateArgNames(Function *OldFunc, Function *NewFunc) { + auto NewArgIter = NewFunc->arg_begin(); + if (OldFunc->getReturnType()->isAggregateType()) { + NewArgIter->setName("retVal"); + NewArgIter++; + } + + for (const Argument &OldArg : OldFunc->args()) { + Argument *NewArg = NewArgIter++; + NewArg->setName(OldArg.getName() + + (OldArg.getType()->isAggregateType() ? ".ptr" : "")); + } +} + +// Replace all uses of an old value with a new one, disregarding the type. We +// correct the types after we wire the new parameters in, in fixFunctionBody. +static void BlindReplace(Value *Old, Value *New) { + for (auto UseIter = Old->use_begin(), E = Old->use_end(); E != UseIter;) { + Use &AUse = *(UseIter++); + AUse.set(New); + } +} + +// Adapt the body of a function for the new arguments. +static void ConvertArgumentValue(Value *Old, Value *New, + Instruction *InsPoint) { + if (Old == New) + return; + + if (Old->getType() == New->getType()) { + Old->replaceAllUsesWith(New); + New->takeName(Old); + return; + } + + bool IsAggregateToPtr = + Old->getType()->isAggregateType() && New->getType()->isPointerTy(); + BlindReplace(Old, (IsAggregateToPtr + ? new LoadInst(New, Old->getName() + ".sreg", InsPoint) + : New)); +} + +// Fix returns. Return true if fixes were needed. +static void FixReturn(Function *OldFunc, Function *NewFunc) { + + Argument *FirstNewArg = NewFunc->getArgumentList().begin(); + + for (auto BIter = NewFunc->begin(), LastBlock = NewFunc->end(); + LastBlock != BIter;) { + BasicBlock *BB = BIter++; + for (auto IIter = BB->begin(), LastI = BB->end(); LastI != IIter;) { + Instruction *Instr = IIter++; + if (ReturnInst *Ret = dyn_cast<ReturnInst>(Instr)) { + auto RetVal = Ret->getReturnValue(); + IRBuilder<> Builder(Ret); + StoreInst *Store = Builder.CreateStore(RetVal, FirstNewArg); + Store->setAlignment(FirstNewArg->getParamAlignment()); + Builder.CreateRetVoid(); + Ret->eraseFromParent(); + } + } + } +} + +// TODO (mtrofin): is this comprehensive? +template <class TCall> +void CopyCallAttributesAndMetadata(TCall *Orig, TCall *NewCall) { + NewCall->setCallingConv(Orig->getCallingConv()); + NewCall->setAttributes(NewCall->getAttributes().addAttributes( + Orig->getContext(), AttributeSet::FunctionIndex, + Orig->getAttributes().getFnAttributes())); + NewCall->takeName(Orig); +} + +static InvokeInst *CreateCallFrom(InvokeInst *Orig, Value *Target, + ArrayRef<Value *> &Args, + IRBuilder<> &Builder) { + auto Ret = Builder.CreateInvoke(Target, Orig->getNormalDest(), + Orig->getUnwindDest(), Args); + CopyCallAttributesAndMetadata(Orig, Ret); + return Ret; +} + +static CallInst *CreateCallFrom(CallInst *Orig, Value *Target, + ArrayRef<Value *> &Args, IRBuilder<> &Builder) { + + CallInst *Ret = Builder.CreateCall(Target, Args); + Ret->setTailCallKind(Orig->getTailCallKind()); + CopyCallAttributesAndMetadata(Orig, Ret); + return Ret; +} + +// Insert Alloca at a specified location (normally, beginning of function) +// to avoid memory leaks if reason for inserting the Alloca +// (typically a call/invoke) is in a loop. +static AllocaInst *InsertAllocaAtLocation(IRBuilder<> &Builder, + BasicBlock::iterator &AllocaInsPoint, + Type *ValType) { + auto SavedInsPoint = Builder.GetInsertPoint(); + Builder.SetInsertPoint(AllocaInsPoint); + auto *Alloca = Builder.CreateAlloca(ValType); + AllocaInsPoint = Builder.GetInsertPoint(); + Builder.SetInsertPoint(SavedInsPoint); + return Alloca; +} + +// Fix a call site by handing return type changes and/or parameter type and +// attribute changes. +template <class TCall> +void SimplifyStructRegSignatures::fixCallSite(LLVMContext &Ctx, TCall *OldCall, + unsigned PreferredAlignment) { + Value *NewTarget = OldCall->getCalledValue(); + + if (Function *CalledFunc = dyn_cast<Function>(NewTarget)) { + NewTarget = this->FunctionMap[CalledFunc]; + } + assert(NewTarget); + + auto *NewType = cast<FunctionType>( + Mapper.getSimpleType(Ctx, NewTarget->getType())->getPointerElementType()); + + auto *OldRetType = OldCall->getType(); + const bool IsSRet = + !OldCall->getType()->isVoidTy() && NewType->getReturnType()->isVoidTy(); + + IRBuilder<> Builder(OldCall); + auto AllocaInsPoint = + OldCall->getParent()->getParent()->getEntryBlock().getFirstInsertionPt(); + + if (IsSRet) { + auto *Alloca = InsertAllocaAtLocation(Builder, AllocaInsPoint, OldRetType); + + Alloca->takeName(OldCall); + Alloca->setAlignment(PreferredAlignment); + + auto *NewCall = fixCallTargetAndArguments(Ctx, Builder, OldCall, NewTarget, + NewType, AllocaInsPoint, Alloca); + assert(NewCall); + if (auto *Invoke = dyn_cast<InvokeInst>(OldCall)) + Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt()); + + auto *Load = Builder.CreateLoad(Alloca, Alloca->getName() + ".sreg"); + Load->setAlignment(Alloca->getAlignment()); + OldCall->replaceAllUsesWith(Load); + } else { + auto *NewCall = fixCallTargetAndArguments(Ctx, Builder, OldCall, NewTarget, + NewType, AllocaInsPoint); + OldCall->replaceAllUsesWith(NewCall); + } + + OldCall->eraseFromParent(); +} + +template <class TCall> +TCall *SimplifyStructRegSignatures::fixCallTargetAndArguments( + LLVMContext &Ctx, IRBuilder<> &Builder, TCall *OldCall, Value *NewTarget, + FunctionType *NewType, BasicBlock::iterator AllocaInsPoint, + Value *ExtraArg) { + SmallSetVector<unsigned, TypicalFuncArity> ByRefPlaces; + SmallVector<Value *, TypicalFuncArity> NewArgs; + + unsigned argOffset = ExtraArg ? 1 : 0; + if (ExtraArg) + NewArgs.push_back(ExtraArg); + + // Go over the argument list used in the call/invoke, in order to + // correctly deal with varargs scenarios. + unsigned NumActualParams = OldCall->getNumArgOperands(); + unsigned VarargMark = NewType->getNumParams(); + for (unsigned ArgPos = 0; ArgPos < NumActualParams; ArgPos++) { + + Use &OldArgUse = OldCall->getOperandUse(ArgPos); + Value *OldArg = OldArgUse; + Type *OldArgType = OldArg->getType(); + unsigned NewArgPos = OldArgUse.getOperandNo() + argOffset; + Type *NewArgType = NewType->getFunctionParamType(NewArgPos); + + if (OldArgType != NewArgType && OldArgType->isAggregateType()) { + if (NewArgPos >= VarargMark) { + errs() << *OldCall << '\n'; + report_fatal_error("Aggregate register vararg is not supported"); + } + auto *Alloca = + InsertAllocaAtLocation(Builder, AllocaInsPoint, OldArgType); + Alloca->setName(OldArg->getName() + ".ptr"); + + Builder.CreateStore(OldArg, Alloca); + ByRefPlaces.insert(NewArgPos); + NewArgs.push_back(Alloca); + } else { + NewArgs.push_back(OldArg); + } + } + + ArrayRef<Value *> ArrRef = NewArgs; + TCall *NewCall = CreateCallFrom(OldCall, NewTarget, ArrRef, Builder); + + // Copy the attributes over, and add byref/sret as necessary. + const AttributeSet &OldAttrSet = OldCall->getAttributes(); + const AttributeSet &NewAttrSet = NewCall->getAttributes(); + + for (unsigned I = 0; I < NewCall->getNumArgOperands(); I++) { + NewCall->setAttributes(NewAttrSet.addAttributes( + Ctx, I + argOffset + 1, OldAttrSet.getParamAttributes(I + 1))); + if (ByRefPlaces.count(I)) { + NewCall->addAttribute(I + 1, Attribute::ByVal); + } + } + + if (ExtraArg) { + NewAttrSet.addAttributes(Ctx, 1, OldAttrSet.getRetAttributes()); + NewCall->addAttribute(1, Attribute::StructRet); + } else { + NewCall->setAttributes(NewAttrSet.addAttributes( + Ctx, AttributeSet::ReturnIndex, OldAttrSet.getRetAttributes())); + } + return NewCall; +} + +void SimplifyStructRegSignatures::scheduleInstructionsForCleanup( + Function *NewFunc) { + for (auto &BBIter : NewFunc->getBasicBlockList()) { + for (auto &IIter : BBIter.getInstList()) { + if (CallInst *Call = dyn_cast<CallInst>(&IIter)) { + CallsToPatch.insert(Call); + } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(&IIter)) { + InvokesToPatch.insert(Invoke); + } + } + } +} + +// Change function body in the light of type changes. +void SimplifyStructRegSignatures::fixFunctionBody(LLVMContext &Ctx, + Function *OldFunc, + Function *NewFunc) { + if (NewFunc->empty()) + return; + + bool returnWasFixed = OldFunc->getReturnType()->isAggregateType(); + + Instruction *InsPoint = NewFunc->begin()->begin(); + auto NewArgIter = NewFunc->arg_begin(); + // Advance one more if we used to return a struct register. + if (returnWasFixed) + NewArgIter++; + + // Wire new parameters in. + for (auto ArgIter = OldFunc->arg_begin(), E = OldFunc->arg_end(); + E != ArgIter;) { + Argument *OldArg = ArgIter++; + Argument *NewArg = NewArgIter++; + ConvertArgumentValue(OldArg, NewArg, InsPoint); + } + + // Now fix instruction types. We know that each value could only possibly be + // of a simplified type. At the end of this, call sites will be invalid, but + // we handle that afterwards, to make sure we have all the functions changed + // first (so that calls have valid targets) + for (auto BBIter = NewFunc->begin(), LBlock = NewFunc->end(); + LBlock != BBIter;) { + auto Block = BBIter++; + for (auto IIter = Block->begin(), LIns = Block->end(); LIns != IIter;) { + auto Instr = IIter++; + Instr->mutateType(Mapper.getSimpleType(Ctx, Instr->getType())); + } + } + if (returnWasFixed) + FixReturn(OldFunc, NewFunc); +} + +// Ensure function is simplified, returning true if the function +// had to be changed. +bool SimplifyStructRegSignatures::simplifyFunction( + LLVMContext &Ctx, Function *OldFunc, + DenseMap<const Function *, DISubprogram> &DISubprogramMap) { + auto *OldFT = OldFunc->getFunctionType(); + auto *NewFT = cast<FunctionType>(Mapper.getSimpleType(Ctx, OldFT)); + + Function *&AssociatedFctLoc = FunctionMap[OldFunc]; + if (NewFT != OldFT) { + auto *NewFunc = Function::Create(NewFT, OldFunc->getLinkage()); + AssociatedFctLoc = NewFunc; + + NewFunc->copyAttributesFrom(OldFunc); + OldFunc->getParent()->getFunctionList().insert(OldFunc, NewFunc); + NewFunc->takeName(OldFunc); + + UpdateArgNames(OldFunc, NewFunc); + ApplyByValAndSRet(OldFunc, NewFunc); + + NewFunc->getBasicBlockList().splice(NewFunc->begin(), + OldFunc->getBasicBlockList()); + + fixFunctionBody(Ctx, OldFunc, NewFunc); + FunctionsToDelete.insert(OldFunc); + auto Found = DISubprogramMap.find(OldFunc); + if (Found != DISubprogramMap.end()) + Found->second.replaceFunction(NewFunc); + } else { + AssociatedFctLoc = OldFunc; + } + scheduleInstructionsForCleanup(AssociatedFctLoc); + return NewFT != OldFT; +} + +bool SimplifyStructRegSignatures::runOnModule(Module &M) { + bool Changed = false; + + const DataLayout *DL = M.getDataLayout(); + unsigned PreferredAlignment = 0; + if (DL) + PreferredAlignment = DL->getStackAlignment(); + + LLVMContext &Ctx = M.getContext(); + auto DISubprogramMap = makeSubprogramMap(M); + + // Change function signatures and fix a changed function body by + // wiring the new arguments. Call sites are unchanged at this point. + for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E;) { + Function *Func = Iter++; + checkNoUnsupportedInstructions(Ctx, Func); + Changed |= simplifyFunction(Ctx, Func, DISubprogramMap); + } + + // Fix call sites. + for (auto &CallToFix : CallsToPatch) { + fixCallSite(Ctx, CallToFix, PreferredAlignment); + } + + for (auto &InvokeToFix : InvokesToPatch) { + fixCallSite(Ctx, InvokeToFix, PreferredAlignment); + } + + // Delete leftover functions - the ones with old signatures. + for (auto &ToDelete : FunctionsToDelete) { + ToDelete->eraseFromParent(); + } + + return Changed; +} + +void SimplifyStructRegSignatures::checkNoUnsupportedInstructions( + LLVMContext &Ctx, Function *Fct) { + for (auto &BB : Fct->getBasicBlockList()) + for (auto &Inst : BB.getInstList()) + if (auto *Landing = dyn_cast<LandingPadInst>(&Inst)) { + auto *LType = Landing->getPersonalityFn()->getType(); + if (LType != Mapper.getSimpleType(Ctx, LType)) { + errs() << *Landing << '\n'; + report_fatal_error("Landing pads with aggregate register " + "signatures are not supported."); + } + } else if (auto *Resume = dyn_cast<ResumeInst>(&Inst)) { + auto *RType = Resume->getValue()->getType(); + if (RType != Mapper.getSimpleType(Ctx, RType)) { + errs() << *Resume << '\n'; + report_fatal_error( + "Resumes with aggregate register signatures are not supported."); + } + } +} + +ModulePass *llvm::createSimplifyStructRegSignaturesPass() { + return new SimplifyStructRegSignatures(); +} diff --git a/test/Transforms/NaCl/simplify-struct-reg-pad-crash.ll b/test/Transforms/NaCl/simplify-struct-reg-pad-crash.ll new file mode 100644 index 00000000000..2a854135328 --- /dev/null +++ b/test/Transforms/NaCl/simplify-struct-reg-pad-crash.ll @@ -0,0 +1,21 @@ +; RUN: not opt < %s -simplify-struct-reg-signatures -S + +%struct = type { i32, i32 } + +declare i32 @__hypothetical_personality_1(%struct) + +declare void @something_to_invoke() + +; landingpad with struct +define void @landingpad_is_struct() { + invoke void @something_to_invoke() + to label %OK unwind label %Err + +OK: + ret void + +Err: + %exn = landingpad i32 personality i32(%struct)* @__hypothetical_personality_1 + cleanup + resume i32 %exn +} \ No newline at end of file diff --git a/test/Transforms/NaCl/simplify-struct-reg-resume-crash.ll b/test/Transforms/NaCl/simplify-struct-reg-resume-crash.ll new file mode 100644 index 00000000000..0f7e519a879 --- /dev/null +++ b/test/Transforms/NaCl/simplify-struct-reg-resume-crash.ll @@ -0,0 +1,20 @@ +; RUN: not opt < %s -simplify-struct-reg-signatures -S + +%struct = type { i8*, void(%struct)* } + +declare i32 @__gxx_personality_v0(...) +declare void @something_to_invoke() + +; landingpad with struct +define void @landingpad_is_struct(%struct %str) { + invoke void @something_to_invoke() + to label %OK unwind label %Err + +OK: + ret void + +Err: + %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + cleanup + resume %struct %str +} \ No newline at end of file diff --git a/test/Transforms/NaCl/simplify-struct-reg-signatures.ll b/test/Transforms/NaCl/simplify-struct-reg-signatures.ll new file mode 100644 index 00000000000..26e7bce0921 --- /dev/null +++ b/test/Transforms/NaCl/simplify-struct-reg-signatures.ll @@ -0,0 +1,265 @@ +; RUN: opt %s -simplify-struct-reg-signatures -S | FileCheck %s + +declare i32 @__gxx_personality_v0(...) + +%struct = type { i32, i32 } + +%rec_struct = type {%rec_struct*} +%rec_problem_struct = type{void (%rec_problem_struct)*} +%rec_pair_1 = type {%rec_pair_2*} +%rec_pair_2 = type {%rec_pair_1*} +%rec_returning = type { %rec_returning (%rec_returning)* } +%direct_def = type { void(%struct)*, %struct } + +; new type declarations: +; CHECK: %struct = type { i32, i32 } +; CHECK-NEXT: %rec_struct = type { %rec_struct* } +; CHECK-NEXT: %rec_problem_struct.simplified = type { void (%rec_problem_struct.simplified*)* } +; CHECK-NEXT: %rec_pair_1 = type { %rec_pair_2* } +; CHECK-NEXT: %rec_pair_2 = type { %rec_pair_1* } +; CHECK-NEXT: %rec_returning.simplified = type { void (%rec_returning.simplified*, %rec_returning.simplified*)* } +; CHECK-NEXT: %direct_def.simplified = type { void (%struct*)*, %struct } + +; externs +declare void @extern_func(%struct) +declare %struct @struct_returning_extern(i32, %struct) + +; verify that parameters are mapped correctly: single param, two, and combo +; with non-struct regs +; CHECK-NOT: declare void @extern_func(%struct) +; CHECK-NOT: declare %struct @struct_returning_extern(i32, %struct) +; CHECK-LABEL: declare void @extern_func(%struct* byval) +; CHECK-LABEL: declare void @struct_returning_extern(%struct* sret, i32, %struct* byval) + +define void @main(%struct* byval %ptr) { + %val = load %struct* %ptr + call void @extern_func(%struct %val) + ret void +} + +define void @two_param_func(%struct %val1, %struct %val2) { + call void @extern_func(%struct %val1) + call void @extern_func(%struct %val2) + ret void +} + +; CHECK-LABEL: define void @two_param_func(%struct* byval %val1.ptr, %struct* byval %val2.ptr) +; CHECK-NOT: define void @two_param_func(%struct %val1, %struct %val2) + +define i32 @another_func(i32 %a, %struct %str, i64 %b) { + call void @two_param_func(%struct %str, %struct %str) + call void @extern_func(%struct %str) + ret i32 0 +} + +; CHECK-LABEL: define i32 @another_func(i32 %a, %struct* byval %str.ptr, i64 %b) +; CHECK: call void @two_param_func(%struct* byval %str.sreg.ptr, %struct* byval %str.sreg.ptr1) + +define %struct @returns_struct(i32 %an_int, %struct %val) { + %tmp = call %struct @struct_returning_extern(i32 %an_int, %struct %val) + %tmp2 = invoke %struct @struct_returning_extern(i32 1, %struct %tmp) + to label %Cont unwind label %Cleanup + +Cont: + ret %struct %tmp2 +Cleanup: + %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + cleanup + resume {i8*, i32} %exn +} + +; verify return value and codegen +; CHECK-LABEL: define void @returns_struct(%struct* sret %retVal, i32 %an_int, %struct* byval %val.ptr) +; CHECK-NEXT: %tmp2 = alloca %struct +; CHECK-NEXT: %tmp.sreg.ptr = alloca %struct +; CHECK-NEXT: %tmp = alloca %struct +; CHECK-NEXT: %val.sreg.ptr = alloca %struct +; CHECK-NEXT: %val.sreg = load %struct* %val.ptr +; CHECK-NEXT: store %struct %val.sreg, %struct* %val.sreg.ptr +; CHECK-NEXT: call void @struct_returning_extern(%struct* sret %tmp, i32 %an_int, %struct* byval %val.sreg.ptr) +; CHECK-NEXT: %tmp.sreg = load %struct* %tmp +; CHECK-NEXT: store %struct %tmp.sreg, %struct* %tmp.sreg.ptr +; CHECK-NEXT: invoke void @struct_returning_extern(%struct* sret %tmp2, i32 1, %struct* byval %tmp.sreg.ptr) +; CHECK-NEXT: to label %Cont unwind label %Cleanup +; CHECK-DAG: Cont: +; CHECK-NEXT: %tmp2.sreg = load %struct* %tmp2 +; CHECK-NEXT: store %struct %tmp2.sreg, %struct* %retVal +; CHECK-NEXT: ret void +; CHECK-DAG: Cleanup: +; CHECK-NEXT: %exn = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 +; CHECK-NEXT: cleanup +; CHECK-NEXT: resume { i8*, i32 } %exn + +define i32 @lots_of_call_attrs() { + %tmp.0 = insertvalue %struct undef, i32 1, 0 + %tmp.1 = insertvalue %struct %tmp.0, i32 2, 1 + %ret = tail call zeroext i32 @another_func(i32 1, %struct %tmp.1, i64 2) readonly + ret i32 %ret +} + +; verify attributes are copied +; CHECK_LABEL: @lots_of_call_attrs +; CHECK: %ret = tail call zeroext i32 @another_func(i32 1, %struct* byval %tmp.1.ptr, i64 2) #0 +; CHECK-NEXT: ret i32 %ret + +declare void @rec_struct_ok(%rec_struct*) +declare void @rec_struct_mod(%rec_struct) + +; compliant recursive structs are kept as-is +; CHECK-LABEL: declare void @rec_struct_ok(%rec_struct*) +; CHECK-LABEL: declare void @rec_struct_mod(%rec_struct* byval) + +define void @rec_call_sreg(%rec_problem_struct %r) { + %tmp = extractvalue %rec_problem_struct %r, 0 + call void %tmp(%rec_problem_struct %r) + ret void +} + +; non-compliant structs are correctly mapped and calls are changed +; CHECK-LABEL: define void @rec_call_sreg(%rec_problem_struct.simplified* byval %r.ptr) +; CHECK: call void %tmp(%rec_problem_struct.simplified* byval %r.sreg.ptr) + +declare void @pairs(%rec_pair_1) + +define %rec_returning @rec_returning_fun(%rec_returning %str) { + %tmp = extractvalue %rec_returning %str, 0 + %ret = call %rec_returning %tmp(%rec_returning %str) + ret %rec_returning %ret +} + +; pair structs +; CHECK-LABEL: declare void @pairs(%rec_pair_1* byval) +; CHECK-LABEL: define void @rec_returning_fun(%rec_returning.simplified* sret %retVal, %rec_returning.simplified* byval %str.ptr) +; CHECK-NEXT: %ret = alloca %rec_returning.simplified +; CHECK-NEXT: %str.sreg.ptr = alloca %rec_returning.simplified +; CHECK-NEXT: %str.sreg = load %rec_returning.simplified* %str.ptr +; CHECK-NEXT: %tmp = extractvalue %rec_returning.simplified %str.sreg, 0 +; CHECK-NEXT: store %rec_returning.simplified %str.sreg, %rec_returning.simplified* %str.sreg.ptr +; CHECK-NEXT: call void %tmp(%rec_returning.simplified* sret %ret, %rec_returning.simplified* byval %str.sreg.ptr) +; CHECK-NEXT: %ret.sreg = load %rec_returning.simplified* %ret +; CHECK-NEXT: store %rec_returning.simplified %ret.sreg, %rec_returning.simplified* %retVal +; CHECK-NEXT: ret void + +define void @direct_caller(%direct_def %def) { + %func = extractvalue %direct_def %def, 0 + %param = extractvalue %direct_def %def, 1 + call void %func(%struct %param) + ret void +} + +; CHECK-LABEL: define void @direct_caller(%direct_def.simplified* byval %def.ptr) +; CHECK-NEXT: %param.ptr = alloca %struct +; CHECK-NEXT: %def.sreg = load %direct_def.simplified* %def.ptr +; CHECK-NEXT: %func = extractvalue %direct_def.simplified %def.sreg, 0 +; CHECK-NEXT: %param = extractvalue %direct_def.simplified %def.sreg, 1 +; CHECK-NEXT: store %struct %param, %struct* %param.ptr +; CHECK-NEXT: call void %func(%struct* byval %param.ptr) +; CHECK-NEXT: ret void + +; vararg functions are converted correctly +declare void @vararg_ok(i32, ...) +; CHECK-LABEL: declare void @vararg_ok(i32, ...) + +define void @vararg_problem(%rec_problem_struct %arg1, ...) { + ; CHECK-LABEL: define void @vararg_problem(%rec_problem_struct.simplified* byval %arg1.ptr, ...) + ret void +} + +%vararg_fp_struct = type { i32, void (i32, ...)* } +declare void @vararg_fp_fct(%vararg_fp_struct %arg) +;CHECK-LABEL: declare void @vararg_fp_fct(%vararg_fp_struct* byval) + +define void @call_vararg(%vararg_fp_struct %param1, ...) { + %fptr = extractvalue %vararg_fp_struct %param1, 1 + call void (i32, ...)* %fptr(i32 0, i32 1) + ret void +} + +; CHECK-LABEL: define void @call_vararg(%vararg_fp_struct* byval %param1.ptr, ...) +; CHECK-NEXT: %param1.sreg = load %vararg_fp_struct* %param1.ptr +; CHECK-NEXT: %fptr = extractvalue %vararg_fp_struct %param1.sreg, 1 +; CHECK-NEXT: call void (i32, ...)* %fptr(i32 0, i32 1) +; CHECK-NEXT: ret void + +%vararg_fp_problem_struct = type { void(%vararg_fp_problem_struct)* } +define void @vararg_fp_problem_call(%vararg_fp_problem_struct* byval %param) { + %fct_ptr = getelementptr %vararg_fp_problem_struct* %param, i32 0, i32 0 + %fct = load void(%vararg_fp_problem_struct)** %fct_ptr + %param_for_call = load %vararg_fp_problem_struct* %param + call void %fct(%vararg_fp_problem_struct %param_for_call) + ret void +} + +; CHECK-LABEL: define void @vararg_fp_problem_call(%vararg_fp_problem_struct.simplified* byval %param) +; CHECK-NEXT: %param_for_call.ptr = alloca %vararg_fp_problem_struct.simplified +; CHECK-NEXT: %fct_ptr = getelementptr %vararg_fp_problem_struct.simplified* %param, i32 0, i32 0 +; CHECK-NEXT: %fct = load void (%vararg_fp_problem_struct.simplified*)** %fct_ptr +; CHECK-NEXT: %param_for_call = load %vararg_fp_problem_struct.simplified* %param +; CHECK-NEXT: store %vararg_fp_problem_struct.simplified %param_for_call, %vararg_fp_problem_struct.simplified* %param_for_call.ptr +; CHECK-NEXT: call void %fct(%vararg_fp_problem_struct.simplified* byval %param_for_call.ptr) +; CHECK-NEXT: ret void + +define void @call_with_array([4 x void(%struct)*] %fptrs, %struct %str) { + %fptr = extractvalue [4 x void(%struct)*] %fptrs, 2 + call void %fptr(%struct %str) + ret void +} + +; CHECK-LABEL: define void @call_with_array([4 x void (%struct*)*]* byval %fptrs.ptr, %struct* byval %str.ptr) +; CHECK-NEXT: %str.sreg.ptr = alloca %struct +; CHECK-NEXT: %fptrs.sreg = load [4 x void (%struct*)*]* %fptrs.ptr +; CHECK-NEXT: %str.sreg = load %struct* %str.ptr +; CHECK-NEXT: %fptr = extractvalue [4 x void (%struct*)*] %fptrs.sreg, 2 +; CHECK-NEXT: store %struct %str.sreg, %struct* %str.sreg.ptr +; CHECK-NEXT: call void %fptr(%struct* byval %str.sreg.ptr) +; CHECK-NEXT: ret void + +define void @call_with_array_ptr([4 x void(%struct)*]* %fptrs, %struct %str) { + %fptr_ptr = getelementptr [4 x void(%struct)*]* %fptrs, i32 0, i32 2 + %fptr = load void(%struct)** %fptr_ptr + call void %fptr(%struct %str) + ret void +} + +; CHECK-LABEL: define void @call_with_array_ptr([4 x void (%struct*)*]* %fptrs, %struct* byval %str.ptr) +; CHECK-NEXT: %str.sreg.ptr = alloca %struct +; CHECK-NEXT: %str.sreg = load %struct* %str.ptr +; CHECK-NEXT: %fptr_ptr = getelementptr [4 x void (%struct*)*]* %fptrs, i32 0, i32 2 +; CHECK-NEXT: %fptr = load void (%struct*)** %fptr_ptr +; CHECK-NEXT: store %struct %str.sreg, %struct* %str.sreg.ptr +; CHECK-NEXT: call void %fptr(%struct* byval %str.sreg.ptr) +; CHECK-NEXT: ret void + +define void @call_with_vector(<4 x void (%struct)*> %fptrs, %struct %str) { + %fptr = extractelement <4 x void (%struct)*> %fptrs, i32 2 + call void %fptr(%struct %str) + ret void +} + +; CHECK-LABEL: define void @call_with_vector(<4 x void (%struct*)*> %fptrs, %struct* byval %str.ptr) +; CHECK-NEXT: %str.sreg.ptr = alloca %struct +; CHECK-NEXT: %str.sreg = load %struct* %str.ptr +; CHECK-NEXT: %fptr = extractelement <4 x void (%struct*)*> %fptrs, i32 2 +; CHECK-NEXT: store %struct %str.sreg, %struct* %str.sreg.ptr +; CHECK-NEXT: call void %fptr(%struct* byval %str.sreg.ptr) +; CHECK-NEXT: ret void + +define void @call_with_array_vect([4 x <2 x void(%struct)*>] %fptrs, %struct %str) { + %vect = extractvalue [4 x <2 x void(%struct)*>] %fptrs, 2 + %fptr = extractelement <2 x void (%struct)*> %vect, i32 1 + call void %fptr(%struct %str) + ret void +} + +; CHECK-LABEL: define void @call_with_array_vect([4 x <2 x void (%struct*)*>]* byval %fptrs.ptr, %struct* byval %str.ptr) +; CHECK-NEXT: %str.sreg.ptr = alloca %struct +; CHECK-NEXT: %fptrs.sreg = load [4 x <2 x void (%struct*)*>]* %fptrs.ptr +; CHECK-NEXT: %str.sreg = load %struct* %str.ptr +; CHECK-NEXT: %vect = extractvalue [4 x <2 x void (%struct*)*>] %fptrs.sreg, 2 +; CHECK-NEXT: %fptr = extractelement <2 x void (%struct*)*> %vect, i32 1 +; CHECK-NEXT: store %struct %str.sreg, %struct* %str.sreg.ptr +; CHECK-NEXT: call void %fptr(%struct* byval %str.sreg.ptr) +; CHECK-NEXT: ret void + +; this is at the end, corresponds to the call marked as readonly +; CHECK: attributes #0 = { readonly } \ No newline at end of file diff --git a/test/Transforms/NaCl/simplify-struct-reg-vararg-crash.ll b/test/Transforms/NaCl/simplify-struct-reg-vararg-crash.ll new file mode 100644 index 00000000000..2b0e59fe833 --- /dev/null +++ b/test/Transforms/NaCl/simplify-struct-reg-vararg-crash.ll @@ -0,0 +1,10 @@ +; RUN: not opt < %s -simplify-struct-reg-signatures -S + +%struct = type { i32, i32 } + +declare void @vararg_fct(...) + +define void @vararg_caller_with_agg(%struct %str) { + call void(...)* @vararg_fct(%struct %str) + ret void +} \ No newline at end of file diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp index 2e7299a4676..1e395db4da6 100644 --- a/tools/bugpoint/bugpoint.cpp +++ b/tools/bugpoint/bugpoint.cpp @@ -161,6 +161,7 @@ int main(int argc, char **argv) { initializeGlobalCleanupPass(Registry); initializeGlobalizeConstantVectorsPass(Registry); initializeInsertDivideCheckPass(Registry); + initializeSimplifyStructRegSignaturesPass(Registry); initializePNaClABIVerifyFunctionsPass(Registry); initializePNaClABIVerifyModulePass(Registry); initializePNaClSjLjEHPass(Registry); diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index d1efd250179..0746339362e 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -391,6 +391,7 @@ int main(int argc, char **argv) { initializeGlobalCleanupPass(Registry); initializeGlobalizeConstantVectorsPass(Registry); initializeInsertDivideCheckPass(Registry); + initializeSimplifyStructRegSignaturesPass(Registry); initializePNaClABIVerifyFunctionsPass(Registry); initializePNaClABIVerifyModulePass(Registry); initializePNaClSjLjEHPass(Registry); From dd389e94e83339770732490e8d5ede7d139af582 Mon Sep 17 00:00:00 2001 From: Mircea Trofin <mtrofin@chromium.org> Date: Mon, 23 Mar 2015 14:16:24 -0700 Subject: [PATCH 05/16] Ensure globals marked as "used" are PNaCl - compliant. Globals marked as "used" (e.g. __attribute((used))__) are correctly ignored by the internalize pass. PNaCl ABI compliance requires all globals be marked internal. In such scenarios - whole, single program - marking all globals as internal would be correct. The new pass does so. 4049: globals marked as "used" & pnacl abi compliance Using IRBuilder patch from issue 992493002 at patchset 140001 (http://crrev.com/992493002#ps140001) BUG= https://code.google.com/p/nativeclient/issues/detail?id=4049 R=jfb@chromium.org Review URL: https://codereview.chromium.org/1015553002 --- include/llvm/InitializePasses.h | 1 + include/llvm/Transforms/NaCl.h | 1 + lib/Transforms/NaCl/CMakeLists.txt | 1 + .../NaCl/InternalizeUsedGlobals.cpp | 67 +++++++++++++++++++ lib/Transforms/NaCl/PNaClABISimplify.cpp | 1 + .../NaCl/internalize-used-globals.ll | 34 ++++++++++ tools/bugpoint/bugpoint.cpp | 1 + tools/opt/opt.cpp | 1 + 8 files changed, 107 insertions(+) create mode 100644 lib/Transforms/NaCl/InternalizeUsedGlobals.cpp create mode 100644 test/Transforms/NaCl/internalize-used-globals.ll diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 288fa206b4c..554aca37157 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -312,6 +312,7 @@ void initializeFlattenGlobalsPass(PassRegistry&); void initializeGlobalCleanupPass(PassRegistry&); void initializeGlobalizeConstantVectorsPass(PassRegistry&); void initializeInsertDivideCheckPass(PassRegistry&); +void initializeInternalizeUsedGlobalsPass(PassRegistry&); void initializeNaClCcRewritePass(PassRegistry&); void initializeSimplifyStructRegSignaturesPass(PassRegistry&); void initializePNaClABIVerifyFunctionsPass(PassRegistry&); diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h index d068d43299a..5bfad4ab119 100644 --- a/include/llvm/Transforms/NaCl.h +++ b/include/llvm/Transforms/NaCl.h @@ -52,6 +52,7 @@ ModulePass *createExpandVarArgsPass(); ModulePass *createFlattenGlobalsPass(); ModulePass *createGlobalCleanupPass(); ModulePass *createGlobalizeConstantVectorsPass(); +ModulePass *createInternalizeUsedGlobalsPass(); ModulePass *createSimplifyStructRegSignaturesPass(); ModulePass *createPNaClSjLjEHPass(); ModulePass *createReplacePtrsWithIntsPass(); diff --git a/lib/Transforms/NaCl/CMakeLists.txt b/lib/Transforms/NaCl/CMakeLists.txt index 857b59f27c2..a8188a737ef 100644 --- a/lib/Transforms/NaCl/CMakeLists.txt +++ b/lib/Transforms/NaCl/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_library(LLVMNaClTransforms GlobalCleanup.cpp GlobalizeConstantVectors.cpp InsertDivideCheck.cpp + InternalizeUsedGlobals.cpp PNaClABISimplify.cpp PNaClSjLjEH.cpp PromoteI1Ops.cpp diff --git a/lib/Transforms/NaCl/InternalizeUsedGlobals.cpp b/lib/Transforms/NaCl/InternalizeUsedGlobals.cpp new file mode 100644 index 00000000000..fef6fc04be3 --- /dev/null +++ b/lib/Transforms/NaCl/InternalizeUsedGlobals.cpp @@ -0,0 +1,67 @@ +//===- InternalizeUsedGlobals.cpp - mark used globals as internal ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The internalize pass does not mark internal globals marked as "used", +// which may be achieved with __attribute((used))__ in C++, for example. +// In PNaCl scenarios, we always perform whole program analysis, and +// the ABI requires all but entrypoint globals to be internal. This pass +// satisfies such requirements. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/PassSupport.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +namespace { + +class InternalizeUsedGlobals : public ModulePass { +public: + static char ID; + + InternalizeUsedGlobals() : ModulePass(ID) { + initializeInternalizeUsedGlobalsPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnModule(Module &M); +}; +} + +char InternalizeUsedGlobals::ID = 0; + +INITIALIZE_PASS(InternalizeUsedGlobals, "internalize-used-globals", + "Mark internal globals in the llvm.used list", false, false) + +bool InternalizeUsedGlobals::runOnModule(Module &M) { + bool Changed = false; + + SmallPtrSet<GlobalValue *, 8> Used; + collectUsedGlobalVariables(M, Used, /*CompilerUsed =*/false); + for (GlobalValue *V : Used) { + if (V->getLinkage() != GlobalValue::InternalLinkage) { + // Setting Linkage to InternalLinkage also sets the visibility to + // DefaultVisibility. + // For explicitness, we do so upfront. + V->setVisibility(GlobalValue::DefaultVisibility); + V->setLinkage(GlobalValue::InternalLinkage); + Changed = true; + } + } + return Changed; +} + +ModulePass *llvm::createInternalizeUsedGlobalsPass() { + return new InternalizeUsedGlobals(); +} diff --git a/lib/Transforms/NaCl/PNaClABISimplify.cpp b/lib/Transforms/NaCl/PNaClABISimplify.cpp index c8dcae1d19d..c82575e0389 100644 --- a/lib/Transforms/NaCl/PNaClABISimplify.cpp +++ b/lib/Transforms/NaCl/PNaClABISimplify.cpp @@ -47,6 +47,7 @@ void llvm::PNaClABISimplifyAddPreOptPasses(PassManagerBase &PM) { // allowed to export "__pnacl_pso_root". const char *SymbolsToPreserve[] = { "_start", "__pnacl_pso_root" }; PM.add(createInternalizePass(SymbolsToPreserve)); + PM.add(createInternalizeUsedGlobalsPass()); // Expand out computed gotos (indirectbr and blockaddresses) into switches. PM.add(createExpandIndirectBrPass()); diff --git a/test/Transforms/NaCl/internalize-used-globals.ll b/test/Transforms/NaCl/internalize-used-globals.ll new file mode 100644 index 00000000000..8fd7e329ee3 --- /dev/null +++ b/test/Transforms/NaCl/internalize-used-globals.ll @@ -0,0 +1,34 @@ +; RUN: opt %s -internalize-used-globals -S | FileCheck %s + +target datalayout = "e-p:32:32-i64:64" +target triple = "le32-unknown-nacl" + +@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)], section "llvm.metadata" +; The used list remains unchanged. +; CHECK: @llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)], section "llvm.metadata" + + +define hidden void @foo() #0 { + ret void +} +; Although in the used list, foo becomes internal. +; CHECK-LABEL: define internal void @foo + + +define i32 @_start() { +entry: + ret i32 0 +} +; @_start is left non-internal. +; CHECK-LABEL: define i32 @_start + +define internal void @my_internal() { + ret void +} + +; Internals are left as-is. +; CHECK-LABEL: define internal void @my_internal() + +!llvm.ident = !{!0} +!0 = metadata !{metadata !"clang version 3.5.0 "} + diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp index 1e395db4da6..93811b90016 100644 --- a/tools/bugpoint/bugpoint.cpp +++ b/tools/bugpoint/bugpoint.cpp @@ -161,6 +161,7 @@ int main(int argc, char **argv) { initializeGlobalCleanupPass(Registry); initializeGlobalizeConstantVectorsPass(Registry); initializeInsertDivideCheckPass(Registry); + initializeInternalizeUsedGlobalsPass(Registry); initializeSimplifyStructRegSignaturesPass(Registry); initializePNaClABIVerifyFunctionsPass(Registry); initializePNaClABIVerifyModulePass(Registry); diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index 0746339362e..5a42de64639 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -391,6 +391,7 @@ int main(int argc, char **argv) { initializeGlobalCleanupPass(Registry); initializeGlobalizeConstantVectorsPass(Registry); initializeInsertDivideCheckPass(Registry); + initializeInternalizeUsedGlobalsPass(Registry); initializeSimplifyStructRegSignaturesPass(Registry); initializePNaClABIVerifyFunctionsPass(Registry); initializePNaClABIVerifyModulePass(Registry); From 989d0a7944c06f153357c566a143eca0e02f9438 Mon Sep 17 00:00:00 2001 From: Karl Schimpf <kschimpf@google.com> Date: Tue, 24 Mar 2015 11:22:52 -0700 Subject: [PATCH 06/16] Clean up use of MemoryBuffer in NaClObjDump and NaClBitcodeMunge. Changes the API to function NaClObjDump to expect a MemoryBufferRef instead of a MemoryBuffer*, to be consistent with other functions in NaClReaderWriter.h Also cleans up class NaClBitcodeMunge.cpp to reuse the low-level implementation of the memory buffer. This was done after discovering that getMemBufferCopy allocates (and leaks) an internal low-level implementation of the memory buffer. The code now reuses a single buffer over all tests for that class, stopping the leak to memory. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4103 R=dschuff@chromium.org, jvoung@chromium.org Review URL: https://codereview.chromium.org/1013823002 --- include/llvm/Bitcode/NaCl/NaClBitcodeMunge.h | 2 ++ include/llvm/Bitcode/NaCl/NaClReaderWriter.h | 5 ++-- lib/Bitcode/NaCl/Analysis/NaClObjDump.cpp | 8 +++--- .../NaCl/TestUtils/NaClBitcodeMunge.cpp | 25 ++++++++----------- tools/pnacl-bcdis/pnacl-bcdis.cpp | 3 ++- 5 files changed, 21 insertions(+), 22 deletions(-) diff --git a/include/llvm/Bitcode/NaCl/NaClBitcodeMunge.h b/include/llvm/Bitcode/NaCl/NaClBitcodeMunge.h index 31a5c473363..938b7ced65d 100644 --- a/include/llvm/Bitcode/NaCl/NaClBitcodeMunge.h +++ b/include/llvm/Bitcode/NaCl/NaClBitcodeMunge.h @@ -141,6 +141,8 @@ class NaClBitcodeMunger { raw_string_ostream FatalStream; // The stack of maximum abbreviation indices allowed by block enter record. SmallVector<uint64_t, 3> AbbrevIndexLimitStack; + // The buffer for the contents of the munged input. + SmallVector<char, 1024> MungedInputBuffer; // Records that an error occurred, and returns stream to print error // message to. diff --git a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h index eb3a23f3cc1..6bd4abe0f2a 100644 --- a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h +++ b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h @@ -126,9 +126,8 @@ namespace llvm { /// NaClObjDump - Read PNaCl bitcode file from input, and print a /// textual representation of its contents. NoRecords and NoAssembly - /// define what should not be included in the dump. Note: The caller - /// retains ownership of the Input memory buffer. - bool NaClObjDump(MemoryBuffer *Input, raw_ostream &output, + /// define what should not be included in the dump. + bool NaClObjDump(MemoryBufferRef Input, raw_ostream &output, bool NoRecords, bool NoAssembly); } // end llvm namespace diff --git a/lib/Bitcode/NaCl/Analysis/NaClObjDump.cpp b/lib/Bitcode/NaCl/Analysis/NaClObjDump.cpp index d56ee291ce3..f1606596baf 100644 --- a/lib/Bitcode/NaCl/Analysis/NaClObjDump.cpp +++ b/lib/Bitcode/NaCl/Analysis/NaClObjDump.cpp @@ -3518,19 +3518,19 @@ bool NaClDisTopLevelParser::ParseBlock(unsigned BlockID) { namespace llvm { -bool NaClObjDump(MemoryBuffer *MemBuf, raw_ostream &Output, +bool NaClObjDump(MemoryBufferRef MemBuf, raw_ostream &Output, bool NoRecords, bool NoAssembly) { // Create objects needed to run parser. naclbitc::ObjDumpStream ObjDump(Output, !NoRecords, !NoAssembly); - if (MemBuf->getBufferSize() % 4 != 0) { + if (MemBuf.getBufferSize() % 4 != 0) { ObjDump.Error() << "Bitcode stream should be a multiple of 4 bytes in length.\n"; return true; } - const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart(); - const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize(); + const unsigned char *BufPtr = (const unsigned char *)MemBuf.getBufferStart(); + const unsigned char *EndBufPtr = BufPtr+MemBuf.getBufferSize(); const unsigned char *HeaderPtr = BufPtr; // Read header and verify it is good. diff --git a/lib/Bitcode/NaCl/TestUtils/NaClBitcodeMunge.cpp b/lib/Bitcode/NaCl/TestUtils/NaClBitcodeMunge.cpp index 029578234a3..272634654b8 100644 --- a/lib/Bitcode/NaCl/TestUtils/NaClBitcodeMunge.cpp +++ b/lib/Bitcode/NaCl/TestUtils/NaClBitcodeMunge.cpp @@ -41,9 +41,8 @@ void NaClBitcodeMunger::setupTest( DumpResults.clear(); // Throw away any previous results. std::string DumpBuffer; DumpStream = new raw_string_ostream(DumpResults); - SmallVector<char, 0> StreamBuffer; - StreamBuffer.reserve(256*1024); - NaClBitstreamWriter OutStream(StreamBuffer); + MungedInputBuffer.clear(); + NaClBitstreamWriter OutStream(MungedInputBuffer); Writer = &OutStream; if (DebugEmitRecord) { @@ -54,14 +53,13 @@ void NaClBitcodeMunger::setupTest( SetBID = -1; writeMungedData(Munges, MungesSize, AddHeader); - std::string OutBuffer; - raw_string_ostream BitcodeStrm(OutBuffer); - for (SmallVectorImpl<char>::const_iterator - Iter = StreamBuffer.begin(), IterEnd = StreamBuffer.end(); - Iter != IterEnd; ++Iter) { - BitcodeStrm << *Iter; - } - MungedInput = MemoryBuffer::getMemBufferCopy(BitcodeStrm.str(), TestName); + // Add null terminator, so that we meet the requirements of the + // MemoryBuffer API. + MungedInputBuffer.push_back('\0'); + + MungedInput = MemoryBuffer::getMemBuffer( + StringRef(MungedInputBuffer.data(), MungedInputBuffer.size()-1), + TestName); } void NaClBitcodeMunger::cleanupTest() { @@ -401,9 +399,8 @@ bool NaClObjDumpMunger::runTestWithFlags( /// error stream (rather than buffering in DumpStream), so that /// output can be seen in gtest death test. raw_ostream &Output = RunAsDeathTest ? errs() : *DumpStream; - // TODO(jvoung,kschimpf): Should NaClObjDump take a MemoryBufferRef - // like the parser? - if (NaClObjDump(MungedInput.get(), Output, NoRecords, NoAssembly)) + if (NaClObjDump(MungedInput.get()->getMemBufferRef(), + Output, NoRecords, NoAssembly)) FoundErrors = true; cleanupTest(); return !FoundErrors; diff --git a/tools/pnacl-bcdis/pnacl-bcdis.cpp b/tools/pnacl-bcdis/pnacl-bcdis.cpp index bb6c8e3b00a..38de6733f1b 100644 --- a/tools/pnacl-bcdis/pnacl-bcdis.cpp +++ b/tools/pnacl-bcdis/pnacl-bcdis.cpp @@ -64,7 +64,8 @@ static bool DisassembleBitcode() { } // Parse the the bitcode file. - return NaClObjDump(ErrOrFile.get().release(), Output, NoRecords, NoAssembly); + return NaClObjDump(ErrOrFile.get()->getMemBufferRef(), Output, NoRecords, + NoAssembly); } } From 9828f40b956936a34ef58c8d89ff6f352e6dcba0 Mon Sep 17 00:00:00 2001 From: Karl Schimpf <kschimpf@google.com> Date: Wed, 25 Mar 2015 14:07:29 -0700 Subject: [PATCH 07/16] Removes unused abbreviations from compressed code. The previous version of pnacl-bccompress never removed abbreviations as new abbreviations were added. This resulted in many unused abbreviations being left in the compressed file. This has two costs. The first is that the unnecessary abbreviation is saved to the file. The other is that it may cause the bitcode to use more bits for each abbreviation, increasing the size of each record. This CL removes such abbreviations by adding another bitcode scan to find the best abbreviation to use for each record, and then remove unused abbreviations before writing out the compressed bitcode file. This change (for large bitcode files) results in pnacl-compress running about a 10% slower and produces a bitcode file that is about 1.2% smaller. BUG=None R=jvoung@chromium.org Review URL: https://codereview.chromium.org/1003303002 --- lib/Bitcode/NaCl/Analysis/NaClCompress.cpp | 608 ++++++++++++++------- 1 file changed, 424 insertions(+), 184 deletions(-) diff --git a/lib/Bitcode/NaCl/Analysis/NaClCompress.cpp b/lib/Bitcode/NaCl/Analysis/NaClCompress.cpp index 18465eed435..d4c327f6b18 100644 --- a/lib/Bitcode/NaCl/Analysis/NaClCompress.cpp +++ b/lib/Bitcode/NaCl/Analysis/NaClCompress.cpp @@ -42,6 +42,7 @@ #include "llvm/Bitcode/NaCl/NaClCompress.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Bitcode/NaCl/AbbrevTrieNode.h" #include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h" #include "llvm/Bitcode/NaCl/NaClBitstreamWriter.h" @@ -60,8 +61,8 @@ static bool Error(const std::string &Err) { } // Prints out the abbreviation in readable form to the given Stream. -static void PrintAbbrev(raw_ostream &Stream, - unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) { +static void printAbbrev(raw_ostream &Stream, unsigned BlockID, + const NaClBitCodeAbbrev *Abbrev) { Stream << "Abbrev(block " << BlockID << "): "; Abbrev->Print(Stream); } @@ -74,37 +75,36 @@ typedef std::map<unsigned, unsigned> BitstreamToInternalAbbrevMapType; /// corresponding internal abbreviation indices. class AbbrevBitstreamToInternalMap { public: - AbbrevBitstreamToInternalMap() : NextBitstreamAbbrevIndex(0) {} /// Returns the bitstream abbreviaion index that will be associated /// with the next internal abbreviation index. - unsigned GetNextBitstreamAbbrevIndex() { + unsigned getNextBitstreamAbbrevIndex() { return NextBitstreamAbbrevIndex; } /// Changes the next bitstream abbreviation index to the given value. - void SetNextBitstreamAbbrevIndex(unsigned NextIndex) { + void setNextBitstreamAbbrevIndex(unsigned NextIndex) { NextBitstreamAbbrevIndex = NextIndex; } /// Returns true if there is an internal abbreviation index for the /// given bitstream abbreviation. - bool DefinesBitstreamAbbrevIndex(unsigned Index) { + bool definesBitstreamAbbrevIndex(unsigned Index) { return BitstreamToInternalAbbrevMap.find(Index) != BitstreamToInternalAbbrevMap.end(); } /// Returns the internal abbreviation index for the given bitstream /// abbreviation index. - unsigned GetInternalAbbrevIndex(unsigned Index) { + unsigned getInternalAbbrevIndex(unsigned Index) { return BitstreamToInternalAbbrevMap.at(Index); } /// Installs the given internal abbreviation index using the next /// available bitstream abbreviation index. - void InstallNewBitstreamAbbrevIndex(unsigned InternalIndex) { + void installNewBitstreamAbbrevIndex(unsigned InternalIndex) { BitstreamToInternalAbbrevMap[NextBitstreamAbbrevIndex++] = InternalIndex; } @@ -136,7 +136,7 @@ class BlockAbbrevs { Abbrevs.push_back(Abbrev); } GlobalAbbrevBitstreamToInternalMap. - SetNextBitstreamAbbrevIndex(Abbrevs.size()); + setNextBitstreamAbbrevIndex(Abbrevs.size()); } ~BlockAbbrevs() { @@ -158,9 +158,9 @@ class BlockAbbrevs { // Returns the index to the corresponding application abbreviation, // if it exists. Otherwise return NO_SUCH_ABBREVIATION; - int FindAbbreviation(const NaClBitCodeAbbrev *Abbrev) const { - for (unsigned i = GetFirstApplicationAbbreviation(); - i < GetNumberAbbreviations(); ++i) { + int findAbbreviation(const NaClBitCodeAbbrev *Abbrev) const { + for (unsigned i = getFirstApplicationAbbreviation(); + i < getNumberAbbreviations(); ++i) { if (*Abbrevs[i] == *Abbrev) return i; } return NO_SUCH_ABBREVIATION; @@ -172,8 +172,8 @@ class BlockAbbrevs { /// the given abbreviation. Returns true if new abbreviation. /// Updates Index to the index where the abbreviation is located /// in the set of abbreviations. - bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev, int &Index) { - Index = FindAbbreviation(Abbrev); + bool addAbbreviation(NaClBitCodeAbbrev *Abbrev, int &Index) { + Index = findAbbreviation(Abbrev); if (Index != NO_SUCH_ABBREVIATION) { // Already defined, don't install. Abbrev->dropRef(); @@ -190,24 +190,24 @@ class BlockAbbrevs { /// defined for the block. Guarantees that duplicate abbreviations /// are not added to the block. Note: Code takes ownership of /// the given abbreviation. Returns true if new abbreviation. - bool AddAbbreviation(NaClBitCodeAbbrev *Abbrev) { + bool addAbbreviation(NaClBitCodeAbbrev *Abbrev) { int Index; - return AddAbbreviation(Abbrev, Index); + return addAbbreviation(Abbrev, Index); } /// The block ID associated with the block. - unsigned GetBlockID() const { + unsigned getBlockID() const { return BlockID; } /// Returns the index of the frist application abbreviation for the /// block. - unsigned GetFirstApplicationAbbreviation() const { + unsigned getFirstApplicationAbbreviation() const { return naclbitc::FIRST_APPLICATION_ABBREV; } // Returns the number of abbreviations associated with the block. - unsigned GetNumberAbbreviations() const { + unsigned getNumberAbbreviations() const { return Abbrevs.size(); } @@ -218,38 +218,38 @@ class BlockAbbrevs { /// Returns the abbreviation associated with the given abbreviation /// index. - NaClBitCodeAbbrev *GetIndexedAbbrev(unsigned index) { + NaClBitCodeAbbrev *getIndexedAbbrev(unsigned index) { if (index >= Abbrevs.size()) return 0; return Abbrevs[index]; } // Builds the corresponding fast lookup map for finding abbreviations // that applies to abbreviations in the block - void BuildAbbrevLookupSizeMap( + void buildAbbrevLookupSizeMap( const NaClBitcodeCompressor::CompressFlags &Flags) { - NaClBuildAbbrevLookupMap(GetLookupMap(), - GetAbbrevs(), - GetFirstApplicationAbbreviation()); - if (Flags.ShowAbbrevLookupTries) PrintLookupMap(errs()); + NaClBuildAbbrevLookupMap(getLookupMap(), + getAbbrevs(), + getFirstApplicationAbbreviation()); + if (Flags.ShowAbbrevLookupTries) printLookupMap(errs()); } - AbbrevBitstreamToInternalMap &GetGlobalAbbrevBitstreamToInternalMap() { + AbbrevBitstreamToInternalMap &getGlobalAbbrevBitstreamToInternalMap() { return GlobalAbbrevBitstreamToInternalMap; } - AbbrevLookupSizeMap &GetLookupMap() { + AbbrevLookupSizeMap &getLookupMap() { return LookupMap; } // Returns lower level vector of abbreviations. - const AbbrevVector &GetAbbrevs() const { + const AbbrevVector &getAbbrevs() const { return Abbrevs; } // Returns the abbreviation (index) to use for the corresponding // record, based on the abbreviations of this block. Note: Assumes - // that BuildAbbrevLookupSizeMap has already been called. - unsigned GetRecordAbbrevIndex(const NaClBitcodeRecordData &Record) { + // that buildAbbrevLookupSizeMap has already been called. + unsigned getRecordAbbrevIndex(const NaClBitcodeRecordData &Record) { unsigned BestIndex = 0; // Ignored unless found candidate. unsigned BestScore = 0; // Number of bits associated with BestIndex. bool FoundCandidate = false; @@ -269,7 +269,7 @@ class BlockAbbrevs { IterEnd = Abbreviations.end(); Iter != IterEnd; ++Iter) { uint64_t NumBits = 0; - if (CanUseAbbreviation(Values, Iter->second, NumBits)) { + if (canUseAbbreviation(Values, Iter->second, NumBits)) { if (!FoundCandidate || NumBits < BestScore) { // Use this as candidate. BestIndex = Iter->first; @@ -281,7 +281,7 @@ class BlockAbbrevs { } } } - if (FoundCandidate && BestScore <= UnabbreviatedSize(Record)) { + if (FoundCandidate && BestScore <= unabbreviatedSize(Record)) { return BestIndex; } return naclbitc::UNABBREV_RECORD; @@ -289,12 +289,12 @@ class BlockAbbrevs { // Computes the number of bits that will be generated by the // corresponding read record, if no abbreviation is used. - static uint64_t UnabbreviatedSize(const NaClBitcodeRecordData &Record) { - uint64_t NumBits = MatchVBRBits(Record.Code, DefaultVBRBits); + static uint64_t unabbreviatedSize(const NaClBitcodeRecordData &Record) { + uint64_t NumBits = matchVBRBits(Record.Code, DefaultVBRBits); size_t NumValues = Record.Values.size(); - NumBits += MatchVBRBits(NumValues, DefaultVBRBits); + NumBits += matchVBRBits(NumValues, DefaultVBRBits); for (size_t Index = 0; Index < NumValues; ++Index) { - NumBits += MatchVBRBits(Record.Values[Index], DefaultVBRBits); + NumBits += matchVBRBits(Record.Values[Index], DefaultVBRBits); } return NumBits; } @@ -303,7 +303,7 @@ class BlockAbbrevs { // record. Sets NumBits to the number of bits the abbreviation will // generate. Note: Value of NumBits is undefined if this function // return false. - static bool CanUseAbbreviation(NaClBitcodeValues &Values, + static bool canUseAbbreviation(NaClBitcodeValues &Values, NaClBitCodeAbbrev *Abbrev, uint64_t &NumBits) { NumBits = 0; unsigned OpIndex = 0; @@ -314,7 +314,7 @@ class BlockAbbrevs { const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(OpIndex); switch (Op.getEncoding()) { case NaClBitCodeAbbrevOp::Literal: - if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) { + if (canUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) { ++ValueIndex; ++OpIndex; continue; @@ -327,12 +327,12 @@ class BlockAbbrevs { Abbrev->getOperandInfo(OpIndex+1); // Add size of array. - NumBits += MatchVBRBits(Values.size()-ValueIndex, DefaultVBRBits); + NumBits += matchVBRBits(Values.size()-ValueIndex, DefaultVBRBits); // Add size of each field. for (; ValueIndex != ValueIndexEnd; ++ValueIndex) { uint64_t FieldBits=0; - if (!CanUseSimpleAbbrevOp(ElmtOp, Values[ValueIndex], FieldBits)) { + if (!canUseSimpleAbbrevOp(ElmtOp, Values[ValueIndex], FieldBits)) { return false; } NumBits += FieldBits; @@ -340,7 +340,7 @@ class BlockAbbrevs { return true; } default: { - if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) { + if (canUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) { ++ValueIndex; ++OpIndex; break; @@ -355,7 +355,7 @@ class BlockAbbrevs { // Returns true if the given abbreviation Op defines a single value, // and can be applied to the given Val. Adds the number of bits the // abbreviation Op will generate to NumBits if Op applies. - static bool CanUseSimpleAbbrevOp(const NaClBitCodeAbbrevOp &Op, + static bool canUseSimpleAbbrevOp(const NaClBitCodeAbbrevOp &Op, uint64_t Val, uint64_t &NumBits) { switch (Op.getEncoding()) { @@ -365,13 +365,13 @@ class BlockAbbrevs { return false; case NaClBitCodeAbbrevOp::Fixed: { uint64_t Width = Op.getValue(); - if (!MatchFixedBits(Val, Width)) + if (!matchFixedBits(Val, Width)) return false; NumBits += Width; return true; } case NaClBitCodeAbbrevOp::VBR: - if (unsigned Width = MatchVBRBits(Val, Op.getValue())) { + if (unsigned Width = matchVBRBits(Val, Op.getValue())) { NumBits += Width; return true; } else { @@ -387,7 +387,7 @@ class BlockAbbrevs { // Returns true if the given Val can be represented by abbreviation // operand Fixed(Width). - static bool MatchFixedBits(uint64_t Val, unsigned Width) { + static bool matchFixedBits(uint64_t Val, unsigned Width) { // Note: The reader only allows up to 32 bits for fixed values. if (Val & Mask32) return false; if (Val & ~(~0U >> (32-Width))) return false; @@ -397,7 +397,7 @@ class BlockAbbrevs { // Returns the number of bits needed to represent Val by abbreviation // operand VBR(Width). Note: Returns 0 if Val can't be represented // by VBR(Width). - static unsigned MatchVBRBits(uint64_t Val, unsigned Width) { + static unsigned matchVBRBits(uint64_t Val, unsigned Width) { if (Width == 0) return 0; unsigned NumBits = 0; while (1) { @@ -425,9 +425,9 @@ class BlockAbbrevs { // to a record. AbbrevLookupSizeMap LookupMap; - void PrintLookupMap(raw_ostream &Stream) { + void printLookupMap(raw_ostream &Stream) const { Stream << "------------------------------\n"; - Stream << "Block " << GetBlockID() << " abbreviation tries:\n"; + Stream << "Block " << getBlockID() << " abbreviation tries:\n"; bool IsFirstIteration = true; for (AbbrevLookupSizeMap::const_iterator Iter = LookupMap.begin(), IterEnd = LookupMap.end(); @@ -450,6 +450,17 @@ class BlockAbbrevs { typedef DenseMap<unsigned, BlockAbbrevs*> BlockAbbrevsMapType; typedef std::pair<unsigned, BlockAbbrevs*> BlockAbbrevsMapElmtType; +/// Gets the corresponding block abbreviations for a block ID. +static BlockAbbrevs *getAbbrevs(BlockAbbrevsMapType &AbbrevsMap, + unsigned BlockID) { + BlockAbbrevs *Abbrevs = AbbrevsMap[BlockID]; + if (Abbrevs == nullptr) { + Abbrevs = new BlockAbbrevs(BlockID); + AbbrevsMap[BlockID] = Abbrevs; + } + return Abbrevs; +} + /// Parses the bitcode file, analyzes it, and generates the /// corresponding lists of global abbreviations to use in the /// generated (compressed) bitcode file. @@ -503,7 +514,7 @@ class NaClBlockAnalyzeParser : public NaClBitcodeParser { NaClBlockAnalyzeParser(unsigned BlockID, NaClAnalyzeParser *Context) : NaClBitcodeParser(BlockID, Context), Context(Context) { - Init(); + init(); } virtual ~NaClBlockAnalyzeParser() { @@ -518,7 +529,7 @@ class NaClBlockAnalyzeParser : public NaClBitcodeParser { NaClBlockAnalyzeParser *EnclosingParser) : NaClBitcodeParser(BlockID, EnclosingParser), Context(EnclosingParser->Context) { - Init(); + init(); } public: @@ -534,18 +545,18 @@ class NaClBlockAnalyzeParser : public NaClBitcodeParser { if (Record.UsedAnAbbreviation()) { unsigned AbbrevIndex = Record.GetAbbreviationIndex(); if (LocalAbbrevBitstreamToInternalMap. - DefinesBitstreamAbbrevIndex(AbbrevIndex)) { + definesBitstreamAbbrevIndex(AbbrevIndex)) { Record.SetAbbreviationIndex( LocalAbbrevBitstreamToInternalMap. - GetInternalAbbrevIndex(AbbrevIndex)); + getInternalAbbrevIndex(AbbrevIndex)); } else { AbbrevBitstreamToInternalMap &GlobalAbbrevBitstreamToInternalMap = - GlobalBlockAbbrevs->GetGlobalAbbrevBitstreamToInternalMap(); + GlobalBlockAbbrevs->getGlobalAbbrevBitstreamToInternalMap(); if (GlobalAbbrevBitstreamToInternalMap. - DefinesBitstreamAbbrevIndex(AbbrevIndex)) { + definesBitstreamAbbrevIndex(AbbrevIndex)) { Record.SetAbbreviationIndex( GlobalAbbrevBitstreamToInternalMap. - GetInternalAbbrevIndex(AbbrevIndex)); + getInternalAbbrevIndex(AbbrevIndex)); } else { report_fatal_error("Bad abbreviation index in file"); } @@ -561,12 +572,12 @@ class NaClBlockAnalyzeParser : public NaClBitcodeParser { NaClBitCodeAbbrev *Abbrev, bool IsLocal) { int Index; - AddAbbreviation(BlockID, Abbrev->Simplify(), Index); + addAbbreviation(BlockID, Abbrev->Simplify(), Index); if (IsLocal) { - LocalAbbrevBitstreamToInternalMap.InstallNewBitstreamAbbrevIndex(Index); + LocalAbbrevBitstreamToInternalMap.installNewBitstreamAbbrevIndex(Index); } else { - GetGlobalAbbrevs(BlockID)->GetGlobalAbbrevBitstreamToInternalMap(). - InstallNewBitstreamAbbrevIndex(Index); + getGlobalAbbrevs(BlockID)->getGlobalAbbrevBitstreamToInternalMap(). + installNewBitstreamAbbrevIndex(Index); } } @@ -582,44 +593,35 @@ class NaClBlockAnalyzeParser : public NaClBitcodeParser { /// Returns the set of (global) block abbreviations defined for the /// given block ID. - BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) { - BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID]; - if (Abbrevs == 0) { - Abbrevs = new BlockAbbrevs(BlockID); - Context->BlockAbbrevsMap[BlockID] = Abbrevs; - } - return Abbrevs; - } - - void SetGlobalAbbrevs(unsigned BlockID, BlockAbbrevs *Abbrevs) { - Context->BlockAbbrevsMap[BlockID] = Abbrevs; + BlockAbbrevs *getGlobalAbbrevs(unsigned BlockID) { + return getAbbrevs(Context->BlockAbbrevsMap, BlockID); } // Adds the abbreviation to the list of abbreviations for the given // block. - void AddAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev, + void addAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev, int &Index) { // Get block abbreviations. - BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(BlockID); + BlockAbbrevs* Abbrevs = getGlobalAbbrevs(BlockID); // Read abbreviation and add as a global abbreviation. - if (Abbrevs->AddAbbreviation(Abbrev, Index) + if (Abbrevs->addAbbreviation(Abbrev, Index) && Context->Flags.TraceGeneratedAbbreviations) { - PrintAbbrev(errs(), BlockID, Abbrev); + printAbbrev(errs(), BlockID, Abbrev); } } /// Finds the index to the corresponding internal block abbreviation /// for the given abbreviation. - int FindAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) { - return GetGlobalAbbrevs(BlockID)->FindAbbreviation(Abbrev); + int findAbbreviation(unsigned BlockID, const NaClBitCodeAbbrev *Abbrev) { + return getGlobalAbbrevs(BlockID)->findAbbreviation(Abbrev); } - void Init() { - GlobalBlockAbbrevs = GetGlobalAbbrevs(GetBlockID()); - LocalAbbrevBitstreamToInternalMap.SetNextBitstreamAbbrevIndex( + void init() { + GlobalBlockAbbrevs = getGlobalAbbrevs(GetBlockID()); + LocalAbbrevBitstreamToInternalMap.setNextBitstreamAbbrevIndex( GlobalBlockAbbrevs-> - GetGlobalAbbrevBitstreamToInternalMap().GetNextBitstreamAbbrevIndex()); + getGlobalAbbrevBitstreamToInternalMap().getNextBitstreamAbbrevIndex()); } }; @@ -663,12 +665,12 @@ class UnrolledAbbreviation { bool CanBeBigger = false) : CodeOp(0) { unsigned NextOp = 0; - UnrollAbbrevOp(CodeOp, Abbrev, NextOp); + unrollAbbrevOp(CodeOp, Abbrev, NextOp); --Size; for (unsigned i = 0; i < Size; ++i) { // Create slot and then fill with appropriate operator. AbbrevOps.push_back(CodeOp); - UnrollAbbrevOp(AbbrevOps[i], Abbrev, NextOp); + unrollAbbrevOp(AbbrevOps[i], Abbrev, NextOp); } if (CanBeBigger) { for (; NextOp < Abbrev->getNumOperandInfos(); ++NextOp) { @@ -690,8 +692,8 @@ class UnrolledAbbreviation { /// Prints out the abbreviation modeled by the unrolled /// abbreviation. - void Print(raw_ostream &Stream) const { - NaClBitCodeAbbrev *Abbrev = Restore(false); + void print(raw_ostream &Stream) const { + NaClBitCodeAbbrev *Abbrev = restore(false); Abbrev->Print(Stream); Abbrev->dropRef(); } @@ -699,7 +701,7 @@ class UnrolledAbbreviation { /// Converts the unrolled abbreviation back into a regular /// abbreviation. If Simplify is true, we simplify the /// unrolled abbreviation as well. - NaClBitCodeAbbrev *Restore(bool Simplify=true) const { + NaClBitCodeAbbrev *restore(bool Simplify=true) const { NaClBitCodeAbbrev *Abbrev = new NaClBitCodeAbbrev(); Abbrev->Add(CodeOp); for (std::vector<NaClBitCodeAbbrevOp>::const_iterator @@ -733,7 +735,7 @@ class UnrolledAbbreviation { // Extracts out the next abbreviation operator from the abbreviation // Abbrev, given the index NextOp, and assigns it to AbbrevOp - void UnrollAbbrevOp(NaClBitCodeAbbrevOp &AbbrevOp, + void unrollAbbrevOp(NaClBitCodeAbbrevOp &AbbrevOp, NaClBitCodeAbbrev *Abbrev, unsigned &NextOp) { assert(NextOp < Abbrev->getNumOperandInfos()); @@ -776,25 +778,25 @@ class CandBlockAbbrev { } /// The block ID of the candidate abbreviation. - unsigned GetBlockID() const { + unsigned getBlockID() const { return BlockID; } /// The abbreviation of the candidate abbreviation. - const NaClBitCodeAbbrev *GetAbbrev() const { + const NaClBitCodeAbbrev *getAbbrev() const { return Abbrev; } /// orders this against the candidate abbreviation. - int Compare(const CandBlockAbbrev &CandAbbrev) const { + int compare(const CandBlockAbbrev &CandAbbrev) const { unsigned diff = BlockID - CandAbbrev.BlockID; if (diff) return diff; return Abbrev->Compare(*CandAbbrev.Abbrev); } /// Prints the candidate abbreviation to the given stream. - void Print(raw_ostream &Stream) const { - PrintAbbrev(Stream, BlockID, Abbrev); + void print(raw_ostream &Stream) const { + printAbbrev(Stream, BlockID, Abbrev); } private: @@ -806,7 +808,7 @@ class CandBlockAbbrev { static inline bool operator<(const CandBlockAbbrev &A1, const CandBlockAbbrev &A2) { - return A1.Compare(A2) < 0; + return A1.compare(A2) < 0; } /// Models the set of candidate abbreviations being considered, and @@ -835,22 +837,22 @@ class CandidateAbbrevs { /// instances expected to use this candidate abbreviation. Returns /// true if the corresponding candidate abbreviation is added to this /// set of candidate abbreviations. - bool Add(unsigned BlockID, + bool add(unsigned BlockID, UnrolledAbbreviation &UnrolledAbbrev, unsigned NumInstances); /// Returns the list of candidate abbreviations in this set. - const AbbrevCountMap &GetAbbrevsMap() const { + const AbbrevCountMap &getAbbrevsMap() const { return AbbrevsMap; } /// Prints out the current contents of this set. - void Print(raw_ostream &Stream, const char *Title = "Candidates") const { + void print(raw_ostream &Stream, const char *Title = "Candidates") const { Stream << "-- " << Title << ": \n"; for (const_iterator Iter = AbbrevsMap.begin(), IterEnd = AbbrevsMap.end(); Iter != IterEnd; ++Iter) { Stream << format("%12u", Iter->second) << ": "; - Iter->first.Print(Stream); + Iter->first.print(Stream); } Stream << "--\n"; } @@ -863,13 +865,13 @@ class CandidateAbbrevs { BlockAbbrevsMapType &BlockAbbrevsMap; }; -bool CandidateAbbrevs::Add(unsigned BlockID, +bool CandidateAbbrevs::add(unsigned BlockID, UnrolledAbbreviation &UnrolledAbbrev, unsigned NumInstances) { // Drop if it corresponds to an existing global abbreviation. - NaClBitCodeAbbrev *Abbrev = UnrolledAbbrev.Restore(); + NaClBitCodeAbbrev *Abbrev = UnrolledAbbrev.restore(); if (BlockAbbrevs* Abbrevs = BlockAbbrevsMap[BlockID]) { - if (Abbrevs->FindAbbreviation(Abbrev) != + if (Abbrevs->findAbbreviation(Abbrev) != BlockAbbrevs::NO_SUCH_ABBREVIATION) { Abbrev->dropRef(); return false; @@ -892,7 +894,7 @@ bool CandidateAbbrevs::Add(unsigned BlockID, // values at Index are distributed. Any found abbreviations are added // to the candidate abbreviations CandAbbrevs. Returns true only if we // have added new candidate abbreviations to CandAbbrevs. -static bool AddNewAbbreviations(unsigned BlockID, +static bool addNewAbbreviations(unsigned BlockID, const UnrolledAbbreviation &Abbrev, unsigned Index, NaClBitcodeValueDist &ValueDist, @@ -921,7 +923,7 @@ static bool AddNewAbbreviations(unsigned BlockID, NaClBitcodeDistElement *Elmt = ValueDist.at(Range.first); UnrolledAbbreviation CandAbbrev(Abbrev); CandAbbrev.AbbrevOps[Index] = NaClBitCodeAbbrevOp(Range.first); - return CandAbbrevs.Add(BlockID, CandAbbrev, Elmt->GetNumInstances()); + return CandAbbrevs.add(BlockID, CandAbbrev, Elmt->GetNumInstances()); } return false; } @@ -931,7 +933,7 @@ static bool AddNewAbbreviations(unsigned BlockID, // corresponding distribution of value indices associated with the // abbreviation. Any found abbreviations are added to the candidate // abbreviations CandAbbrevs. -static void AddNewAbbreviations( +static void addNewAbbreviations( const NaClBitcodeCompressor::CompressFlags &Flags, unsigned BlockID, const UnrolledAbbreviation &Abbrev, NaClBitcodeDist &IndexDist, CandidateAbbrevs &CandAbbrevs) { @@ -944,7 +946,7 @@ static void AddNewAbbreviations( IndexIterEnd = IndexDistribution->end(); IndexIter != IndexIterEnd; ++IndexIter) { unsigned Index = static_cast<unsigned>(IndexIter->second); - if (AddNewAbbreviations( + if (addNewAbbreviations( BlockID, Abbrev, Index, cast<NaClBitcodeValueIndexDistElement>(IndexDist.at(Index)) ->GetValueDist(), @@ -959,7 +961,7 @@ static void AddNewAbbreviations( // SizeDist is the corresponding distribution of sizes associated with // the abbreviation. Any found abbreviations are added to the // candidate abbreviations CandAbbrevs. -static void AddNewAbbreviations( +static void addNewAbbreviations( const NaClBitcodeCompressor::CompressFlags &Flags, unsigned BlockID, NaClBitCodeAbbrev *Abbrev, unsigned Code, NaClBitcodeDist &SizeDist, CandidateAbbrevs &CandAbbrevs) { @@ -976,11 +978,11 @@ static void AddNewAbbreviations( // Try making the code a literal. UnrolledAbbreviation CandAbbrev(UnrolledAbbrev); CandAbbrev.CodeOp = NaClBitCodeAbbrevOp(Code); - CandAbbrevs.Add(BlockID, CandAbbrev, + CandAbbrevs.add(BlockID, CandAbbrev, SizeDist.at(Size)->GetNumInstances()); } // Now process value indices to find candidate abbreviations. - AddNewAbbreviations(Flags, BlockID, UnrolledAbbrev, + addNewAbbreviations(Flags, BlockID, UnrolledAbbrev, cast<NaClBitcodeSizeDistElement>(SizeDist.at(Size)) ->GetValueIndexDist(), CandAbbrevs); @@ -992,7 +994,7 @@ static void AddNewAbbreviations( // BlockID. AbbrevDist is the distribution map of abbreviations // associated with BlockID. Any found abbreviations are added to the // candidate abbreviations CandAbbrevs. -static void AddNewAbbreviations( +static void addNewAbbreviations( const NaClBitcodeCompressor::CompressFlags &Flags, unsigned BlockID, BlockAbbrevs *Abbrevs, NaClBitcodeDist &AbbrevDist, CandidateAbbrevs &CandAbbrevs) { @@ -1003,7 +1005,7 @@ static void AddNewAbbreviations( AbbrevIterEnd = AbbrevDistribution->end(); AbbrevIter != AbbrevIterEnd; ++AbbrevIter) { NaClBitcodeDistValue AbbrevIndex = AbbrevIter->second; - NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(AbbrevIndex); + NaClBitCodeAbbrev *Abbrev = Abbrevs->getIndexedAbbrev(AbbrevIndex); NaClBitcodeAbbrevDistElement *AbbrevElmt = cast<NaClBitcodeAbbrevDistElement>(AbbrevDist.at(AbbrevIndex)); NaClBitcodeDist &CodeDist = AbbrevElmt->GetCodeDist(); @@ -1015,7 +1017,7 @@ static void AddNewAbbreviations( CodeIterEnd = CodeDistribution->end(); CodeIter != CodeIterEnd; ++CodeIter) { unsigned Code = static_cast<unsigned>(CodeIter->second); - AddNewAbbreviations( + addNewAbbreviations( Flags, BlockID, Abbrev, Code, cast<NaClCompressCodeDistElement>(CodeDist.at(CodeIter->second)) ->GetSizeDist(), @@ -1030,7 +1032,7 @@ typedef std::pair<unsigned, CandBlockAbbrev> CountedAbbrevType; // BlockDist. BlockAbbrevsMap contains the set of read global // abbreviations. Adds found candidate abbreviations to the set of // known global abbreviations. -static void AddNewAbbreviations( +static void addNewAbbreviations( const NaClBitcodeCompressor::CompressFlags &Flags, NaClBitcodeBlockDist &BlockDist, BlockAbbrevsMapType &BlockAbbrevsMap) { CandidateAbbrevs CandAbbrevs(BlockAbbrevsMap); @@ -1041,7 +1043,7 @@ static void AddNewAbbreviations( Iter = Distribution->begin(), IterEnd = Distribution->end(); Iter != IterEnd; ++Iter) { unsigned BlockID = static_cast<unsigned>(Iter->second); - AddNewAbbreviations( + addNewAbbreviations( Flags, BlockID, BlockAbbrevsMap[BlockID], cast<NaClCompressBlockDistElement>(BlockDist.at(BlockID)) ->GetAbbrevDist(), @@ -1060,20 +1062,20 @@ static void AddNewAbbreviations( // to 1000 records. Assuming that both abbreviations shrink the // record by the same number of bits, we clearly want the tool to // choose the second abbreviation when selecting the abbreviation - // index to choose (via method GetRecordAbbrevIndex). + // index to choose (via method getRecordAbbrevIndex). // // Selecting the second is important in that abbreviation are // refined by successive calls to this tool. We do not want to // restrict downstream refinements prematurely. Hence, we want the // tool to choose the abbreviation with the most candidates first. // - // Since method GetRecordAbbrevIndex chooses the first abbreviation + // Since method getRecordAbbrevIndex chooses the first abbreviation // that generates the least number of bits, we clearly want to make // sure that the second abbreviation occurs before the first. std::vector<CountedAbbrevType> Candidates; for (CandidateAbbrevs::const_iterator - Iter = CandAbbrevs.GetAbbrevsMap().begin(), - IterEnd = CandAbbrevs.GetAbbrevsMap().end(); + Iter = CandAbbrevs.getAbbrevsMap().begin(), + IterEnd = CandAbbrevs.getAbbrevsMap().end(); Iter != IterEnd; ++Iter) { Candidates.push_back(CountedAbbrevType(Iter->second,Iter->first)); } @@ -1088,14 +1090,14 @@ static void AddNewAbbreviations( unsigned Min = (Iter->first >> 2); for (; Iter != IterEnd; ++Iter) { if (Iter->first < Min) break; - unsigned BlockID = Iter->second.GetBlockID(); + unsigned BlockID = Iter->second.getBlockID(); BlockAbbrevs *Abbrevs = BlockAbbrevsMap[BlockID]; - NaClBitCodeAbbrev *Abbrev = Iter->second.GetAbbrev()->Copy(); + NaClBitCodeAbbrev *Abbrev = Iter->second.getAbbrev()->Copy(); if (Flags.TraceGeneratedAbbreviations) { errs() <<format("%12u", Iter->first) << ": "; - PrintAbbrev(errs(), BlockID, Abbrev); + printAbbrev(errs(), BlockID, Abbrev); } - Abbrevs->AddAbbreviation(Abbrev); + Abbrevs->addAbbreviation(Abbrev); } if (Flags.TraceGeneratedAbbreviations) { errs() << "--\n"; @@ -1105,7 +1107,7 @@ static void AddNewAbbreviations( // Walks the block distribution (BlockDist), sorting entries based // on the distribution of blocks and abbreviations, and then // prints out the frequency of each abbreviation used. -static void DisplayAbbreviationFrequencies( +static void displayAbbreviationFrequencies( raw_ostream &Output, const NaClBitcodeBlockDist &BlockDist, const BlockAbbrevsMapType &BlockAbbrevsMap) { @@ -1133,7 +1135,7 @@ static void DisplayAbbreviationFrequencies( unsigned Count = AbbrevDist.at(Index)->GetNumInstances(); Output << format("%8u (%6.2f%%): ", Count, (double) Count/Total*100.0); - BlockPos->second->GetIndexedAbbrev(Index)->Print(Output); + BlockPos->second->getIndexedAbbrev(Index)->Print(Output); } } Output << '\n'; @@ -1144,7 +1146,7 @@ static void DisplayAbbreviationFrequencies( // to use, from memory buffer MemBuf containing the input bitcode file. // If ShowAbbreviationFrequencies or Flag.ShowValueDistributions are // defined, the corresponding results is printed to Output. -static bool AnalyzeBitcode( +static bool analyzeBitcode( const NaClBitcodeCompressor::CompressFlags &Flags, MemoryBuffer *MemBuf, raw_ostream &Output, @@ -1173,18 +1175,241 @@ static bool AnalyzeBitcode( } if (Flags.ShowAbbreviationFrequencies) - DisplayAbbreviationFrequencies(Output, Parser.BlockDist, BlockAbbrevsMap); + displayAbbreviationFrequencies(Output, Parser.BlockDist, BlockAbbrevsMap); if (Flags.ShowValueDistributions) Parser.BlockDist.Print(Output); - AddNewAbbreviations(Flags, Parser.BlockDist, BlockAbbrevsMap); + addNewAbbreviations(Flags, Parser.BlockDist, BlockAbbrevsMap); + return false; +} + +/// Models a queue of selected abbreviation indices, for each record +/// in all instances of a given block. Elements are added in the order +/// they appear in the bitcode file. +/// +/// The goal is to remove abbreviations that are not really used, from +/// the list of candidate abbrevations, reducing the number of +/// abbreviations needed. This is important because as the number of +/// abbreviations increase, the number of bits needed to store the +/// abbreviations also increase. By removing unnecessary +/// abbreviations, this improves the ability of this executable to +/// compress the bitcode file. +class SelectedAbbrevsQueue { + SelectedAbbrevsQueue(const SelectedAbbrevsQueue &) = delete; + SelectedAbbrevsQueue &operator=(const SelectedAbbrevsQueue &) = delete; + + // The minimum number of times an abbreviation must be used in the + // compressed version of the bitcode file, if it is going to be used + // at all. + static const uint32_t MinUsageCount = 5; + +public: + SelectedAbbrevsQueue() : AbbrevsIndexQueueFront(0) {} + + /// Adds the given selected abbreviation index to the end of the + /// queue. + void addIndex(unsigned Index) { AbbrevsIndexQueue.push_back(Index); } + + /// Removes the next selected abbreviation index from the + /// queue. + unsigned removeIndex() { + assert(AbbrevsIndexQueueFront < AbbrevsIndexQueue.size()); + return AbbrevsIndexQueue[AbbrevsIndexQueueFront++]; + } + + /// Takes the abbreviation indices on the queue, determines what + /// subset of abbreviations should be kept, and puts them on the + /// list of abbreviations defined by getKeptAbbrevs. Updates the + /// abbreviation idices on the queue to match the corresponding kept + /// abbreviations. + /// + /// Should be called after the last call to AddIndex, and before + /// calling removeIndex. + void installFrequentlyUsedAbbrevs(BlockAbbrevs *Abbrevs); + + /// Return the list of kept abbreviations, for the corresponding + /// block, in the order they should be defined. + const std::vector<NaClBitCodeAbbrev *> &getKeptAbbrevs() const { + return KeptAbbrevs; + } + + /// Returns the maximum abbreviation index used by the kept + /// abbreviations. + unsigned getMaxKeptAbbrevIndex() const { + return KeptAbbrevs.size() + naclbitc::DEFAULT_MAX_ABBREV; + } + +protected: + // Defines a queue of abbreviations indices using a + // vector. AbbrevsIndexQueueFront is used to point to the front of + // the queue. + std::vector<unsigned> AbbrevsIndexQueue; + unsigned AbbrevsIndexQueueFront; + // The list of abbreviations that should be defined for the block, + // in the order they should be defined. + std::vector<NaClBitCodeAbbrev *> KeptAbbrevs; +}; + +void SelectedAbbrevsQueue::installFrequentlyUsedAbbrevs(BlockAbbrevs *Abbrevs) { + // Start by collecting usage counts for each abbreviation. + assert(AbbrevsIndexQueueFront == 0); + assert(KeptAbbrevs.empty()); + std::map<unsigned, uint32_t> UsageMap; + for (unsigned Index : AbbrevsIndexQueue) { + if (Index != naclbitc::UNABBREV_RECORD) + ++UsageMap[Index]; + } + + // Install results + std::map<unsigned, unsigned> KeepIndexMap; + for (const std::pair<unsigned, uint32_t> &Pair : UsageMap) { + if (Pair.second >= MinUsageCount) { + KeepIndexMap[Pair.first] = + KeptAbbrevs.size() + naclbitc::FIRST_APPLICATION_ABBREV; + KeptAbbrevs.push_back(Abbrevs->getIndexedAbbrev(Pair.first)); + } + } + + // Update the queue of selected abbreviation indices to match kept + // abbreviations. + for (unsigned &AbbrevIndex : AbbrevsIndexQueue) { + std::map<unsigned, unsigned>::const_iterator NewPos = + KeepIndexMap.find(AbbrevIndex); + AbbrevIndex = NewPos == KeepIndexMap.end() ? naclbitc::UNABBREV_RECORD + : NewPos->second; + } +} + +/// Models the kept queue of abbreviations associated with each block ID. +typedef std::map<unsigned, SelectedAbbrevsQueue *> BlockAbbrevsQueueMap; +typedef std::pair<unsigned, SelectedAbbrevsQueue *> BlockAbbrevsQueueMapElmt; + +/// Installs frequently used abbreviations for each of the blocks +/// defined in AbbrevsQueueMap, based on the abbreviations in the +/// corresponding AbbrevsMap +static void +installFrequentlyUsedAbbrevs(BlockAbbrevsMapType &AbbrevsMap, + BlockAbbrevsQueueMap &AbbrevsQueueMap) { + for (const BlockAbbrevsQueueMapElmt &Pair : AbbrevsQueueMap) { + if (SelectedAbbrevsQueue *SelectedAbbrevs = Pair.second) + SelectedAbbrevs->installFrequentlyUsedAbbrevs(AbbrevsMap[Pair.first]); + } +} + +/// Top level parser to queue selected abbreviation indices (within +/// the bitcode file), so that we can remove unused abbreviations from +/// the collected list of abbreviations before generating the final, +/// compressed bitcode file. +class NaClAssignAbbrevsParser : public NaClBitcodeParser { +public: + NaClAssignAbbrevsParser(NaClBitstreamCursor &Cursor, + BlockAbbrevsMapType &AbbrevsMap, + BlockAbbrevsQueueMap &AbbrevsQueueMap) + : NaClBitcodeParser(Cursor), AbbrevsMap(AbbrevsMap), + AbbrevsQueueMap(AbbrevsQueueMap) {} + + ~NaClAssignAbbrevsParser() final {} + + bool ParseBlock(unsigned BlockID) final; + + /// The abbreviations to use for the compressed bitcode. + BlockAbbrevsMapType &AbbrevsMap; + + /// The corresponding selected abbreviation indices to for each + /// block. + BlockAbbrevsQueueMap &AbbrevsQueueMap; +}; + +/// Block parser to queue abbreviation indices used by the records in +/// the block. +class NaClAssignAbbrevsBlockParser : public NaClBitcodeParser { +public: + NaClAssignAbbrevsBlockParser(unsigned BlockID, + NaClAssignAbbrevsParser *Context) + : NaClBitcodeParser(BlockID, Context), BlockID(BlockID), + Context(Context) { + init(); + } + + ~NaClAssignAbbrevsBlockParser() final {} + +protected: + unsigned BlockID; + NaClAssignAbbrevsParser *Context; + // Cached abbreviations defined for this block,. + BlockAbbrevs *Abbreviations; + // Cached abbreviations queue to add abbreviation indices to. + SelectedAbbrevsQueue *Queue; + + NaClAssignAbbrevsBlockParser(unsigned BlockID, + NaClAssignAbbrevsBlockParser *EnclosingParser) + : NaClBitcodeParser(BlockID, EnclosingParser), BlockID(BlockID), + Context(EnclosingParser->Context) { + init(); + } + + void init() { + Abbreviations = getAbbrevs(Context->AbbrevsMap, BlockID); + Queue = Context->AbbrevsQueueMap[BlockID]; + if (Queue == nullptr) { + Queue = new SelectedAbbrevsQueue(); + Context->AbbrevsQueueMap[BlockID] = Queue; + } + } + + bool ParseBlock(unsigned BlockID) final { + NaClAssignAbbrevsBlockParser Parser(BlockID, this); + return Parser.ParseThisBlock(); + } + + void ProcessRecord() final { + // Find best fitting abbreviation to use, and save. + Queue->addIndex( + Abbreviations->getRecordAbbrevIndex(Record.GetRecordData())); + } +}; + +bool NaClAssignAbbrevsParser::ParseBlock(unsigned BlockID) { + NaClAssignAbbrevsBlockParser Parser(BlockID, this); + return Parser.ParseThisBlock(); +} + +// Reads the bitcode in MemBuf, using the abbreviations in AbbrevsMap, +// and queues the selected abbrevations for each record into +// AbbrevsQueueMap. +static bool chooseAbbrevs(MemoryBuffer *MemBuf, BlockAbbrevsMapType &AbbrevsMap, + BlockAbbrevsQueueMap &AbbrevsQueueMap) { + const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart(); + const unsigned char *EndBufPtr = BufPtr + MemBuf->getBufferSize(); + + // Read header. No verification is needed since AnalyzeBitcode has + // already checked it. + NaClBitcodeHeader Header; + if (Header.Read(BufPtr, EndBufPtr)) + return Error("Invalid PNaCl bitcode header"); + + // Create the bitcode reader. + NaClBitstreamReader StreamFile(BufPtr, EndBufPtr); + NaClBitstreamCursor Stream(StreamFile); + + // Set up the parser. + NaClAssignAbbrevsParser Parser(Stream, AbbrevsMap, AbbrevsQueueMap); + + // Parse the bitcode and choose abbreviations for records. + while (!Stream.AtEndOfStream()) { + if (Parser.Parse()) { + installFrequentlyUsedAbbrevs(AbbrevsMap, AbbrevsQueueMap); + return true; + } + } + installFrequentlyUsedAbbrevs(AbbrevsMap, AbbrevsQueueMap); return false; } /// Parses the input bitcode file and generates the corresponding /// compressed bitcode file, by replacing abbreviations in the input /// file with the corresponding abbreviations defined in -/// BlockAbbrevsMap. +/// BlockAbbrevsMap using the selected abbreviations in AbbrevsQueueMap. class NaClBitcodeCopyParser : public NaClBitcodeParser { public: // Top-level constructor to build the appropriate block parser @@ -1192,21 +1417,24 @@ class NaClBitcodeCopyParser : public NaClBitcodeParser { NaClBitcodeCopyParser(const NaClBitcodeCompressor::CompressFlags &Flags, NaClBitstreamCursor &Cursor, BlockAbbrevsMapType &BlockAbbrevsMap, + BlockAbbrevsQueueMap &AbbrevsQueueMap, NaClBitstreamWriter &Writer) - : NaClBitcodeParser(Cursor), - Flags(Flags), - BlockAbbrevsMap(BlockAbbrevsMap), + : NaClBitcodeParser(Cursor), Flags(Flags), + BlockAbbrevsMap(BlockAbbrevsMap), AbbrevsQueueMap(AbbrevsQueueMap), Writer(Writer) {} virtual ~NaClBitcodeCopyParser() {} - virtual bool ParseBlock(unsigned BlockID); + bool ParseBlock(unsigned BlockID) final; const NaClBitcodeCompressor::CompressFlags &Flags; // The abbreviations to use for the copied bitcode. BlockAbbrevsMapType &BlockAbbrevsMap; + // Map defining the selected abbreviations for each block. + BlockAbbrevsQueueMap &AbbrevsQueueMap; + // The bitstream to copy the compressed bitcode into. NaClBitstreamWriter &Writer; }; @@ -1214,12 +1442,11 @@ class NaClBitcodeCopyParser : public NaClBitcodeParser { class NaClBlockCopyParser : public NaClBitcodeParser { public: // Top-level constructor to build the appropriate block parser. - NaClBlockCopyParser(unsigned BlockID, - NaClBitcodeCopyParser *Context) - : NaClBitcodeParser(BlockID, Context), - Context(Context), - BlockAbbreviations(0) - {} + NaClBlockCopyParser(unsigned BlockID, NaClBitcodeCopyParser *Context) + : NaClBitcodeParser(BlockID, Context), Context(Context), + BlockAbbreviations(nullptr), SelectedAbbrevs(nullptr) { + init(); + } virtual ~NaClBlockCopyParser() {} @@ -1231,24 +1458,31 @@ class NaClBlockCopyParser : public NaClBitcodeParser { // EnterBlock). BlockAbbrevs *BlockAbbreviations; + // The corresonding selected abbreviations. + SelectedAbbrevsQueue *SelectedAbbrevs; + /// Constructor to parse nested blocks. Creates a block parser to /// parse in a block with the given BlockID, and write the block /// back out using the abbreviations in BlockAbbrevsMap. NaClBlockCopyParser(unsigned BlockID, NaClBlockCopyParser *EnclosingParser) : NaClBitcodeParser(BlockID, EnclosingParser), - Context(EnclosingParser->Context) - {} + Context(EnclosingParser->Context), BlockAbbreviations(nullptr), + SelectedAbbrevs(nullptr) { + init(); + } + + void init() { + unsigned BlockID = GetBlockID(); + BlockAbbreviations = getGlobalAbbrevs(BlockID); + SelectedAbbrevs = Context->AbbrevsQueueMap[BlockID]; + assert(SelectedAbbrevs); + } /// Returns the set of (global) block abbreviations defined for the /// given block ID. - BlockAbbrevs *GetGlobalAbbrevs(unsigned BlockID) { - BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID]; - if (Abbrevs == 0) { - Abbrevs = new BlockAbbrevs(BlockID); - Context->BlockAbbrevsMap[BlockID] = Abbrevs; - } - return Abbrevs; + BlockAbbrevs *getGlobalAbbrevs(unsigned BlockID) { + return getAbbrevs(Context->BlockAbbrevsMap, BlockID); } virtual bool ParseBlock(unsigned BlockID) { @@ -1257,14 +1491,13 @@ class NaClBlockCopyParser : public NaClBitcodeParser { } virtual void EnterBlock(unsigned NumWords) { - unsigned BlockID = GetBlockID(); - BlockAbbreviations = GetGlobalAbbrevs(BlockID); - // Enter the subblock. - NaClBitcodeSelectorAbbrev - Selector(BlockAbbreviations->GetNumberAbbreviations()-1); + NaClBitcodeSelectorAbbrev Selector( + SelectedAbbrevs->getMaxKeptAbbrevIndex()); if (Context->Flags.RemoveAbbreviations) Selector = NaClBitcodeSelectorAbbrev(); + + unsigned BlockID = GetBlockID(); Context->Writer.EnterSubblock(BlockID, Selector); if (BlockID != naclbitc::MODULE_BLOCK_ID @@ -1274,22 +1507,18 @@ class NaClBlockCopyParser : public NaClBitcodeParser { // To keep things simple, we dump all abbreviations immediately // inside the module block. Start by dumping module abbreviations // as local abbreviations. - BlockAbbrevs* Abbrevs = GetGlobalAbbrevs(naclbitc::MODULE_BLOCK_ID); - for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation(); - i < Abbrevs->GetNumberAbbreviations(); ++i) { - Context->Writer.EmitAbbrev(Abbrevs->GetIndexedAbbrev(i)->Copy()); + for (auto Abbrev : SelectedAbbrevs->getKeptAbbrevs()) { + Context->Writer.EmitAbbrev(Abbrev->Copy()); } // Insert the block info block, if needed, so that nested blocks // will have defined abbreviations. bool HasNonmoduleAbbrevs = false; - for (const BlockAbbrevsMapElmtType &Pair : Context->BlockAbbrevsMap) { - if (Pair.second == nullptr || - !Pair.second->hasApplicationAbbreviations()) continue; - if (Pair.first != naclbitc::MODULE_BLOCK_ID) { - HasNonmoduleAbbrevs = true; - break; - } + for (const BlockAbbrevsQueueMapElmt &Pair : Context->AbbrevsQueueMap) { + if (Pair.second->getKeptAbbrevs().empty()) + continue; + HasNonmoduleAbbrevs = true; + break; } if (!HasNonmoduleAbbrevs) return; @@ -1302,11 +1531,13 @@ class NaClBlockCopyParser : public NaClBitcodeParser { if (BlockID == naclbitc::MODULE_BLOCK_ID) continue; BlockAbbrevs *Abbrevs = Pair.second; - if (Abbrevs == nullptr) continue; - for (unsigned i = Abbrevs->GetFirstApplicationAbbreviation(); - i < Abbrevs->GetNumberAbbreviations(); ++i) { - Context->Writer.EmitBlockInfoAbbrev(BlockID, - Abbrevs->GetIndexedAbbrev(i)); + if (Abbrevs == nullptr) + continue; + if (SelectedAbbrevsQueue *SelectedAbbrevs = + Context->AbbrevsQueueMap[BlockID]) { + for (auto Abbrev : SelectedAbbrevs->getKeptAbbrevs()) { + Context->Writer.EmitBlockInfoAbbrev(BlockID, Abbrev->Copy()); + } } } Context->Writer.ExitBlock(); @@ -1323,9 +1554,8 @@ class NaClBlockCopyParser : public NaClBitcodeParser { return; } // Find best fitting abbreviation to use, and print out the record - // using that abbreviations. - unsigned AbbrevIndex = - BlockAbbreviations->GetRecordAbbrevIndex(Record.GetRecordData()); + // using that abbreviation. + unsigned AbbrevIndex = SelectedAbbrevs->removeIndex(); if (AbbrevIndex == naclbitc::UNABBREV_RECORD) { Context->Writer.EmitRecord(Record.GetCode(), Values, 0); } else { @@ -1342,13 +1572,13 @@ bool NaClBitcodeCopyParser::ParseBlock(unsigned BlockID) { // Read in bitcode, and write it back out using the abbreviations in // BlockAbbrevsMap, from memory buffer MemBuf containing the input // bitcode file. The bitcode is copied to Output. -static bool CopyBitcode(const NaClBitcodeCompressor::CompressFlags &Flags, - MemoryBuffer *MemBuf, - raw_ostream &Output, - BlockAbbrevsMapType &BlockAbbrevsMap) { +static bool copyBitcode(const NaClBitcodeCompressor::CompressFlags &Flags, + MemoryBuffer *MemBuf, raw_ostream &Output, + BlockAbbrevsMapType &BlockAbbrevsMap, + BlockAbbrevsQueueMap &AbbrevsQueueMap) { const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart(); - const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize(); + const unsigned char *EndBufPtr = BufPtr + MemBuf->getBufferSize(); // Read header. No verification is needed since AnalyzeBitcode has // already checked it. @@ -1362,35 +1592,37 @@ static bool CopyBitcode(const NaClBitcodeCompressor::CompressFlags &Flags, // Create the bitcode writer. SmallVector<char, 0> OutputBuffer; - OutputBuffer.reserve(256*1024); + OutputBuffer.reserve(256 * 1024); NaClBitstreamWriter StreamWriter(OutputBuffer); // Emit the file header. NaClWriteHeader(Header, StreamWriter); // Set up the parser. - NaClBitcodeCopyParser Parser(Flags, Stream, BlockAbbrevsMap, StreamWriter); + NaClBitcodeCopyParser Parser(Flags, Stream, BlockAbbrevsMap, AbbrevsQueueMap, + StreamWriter); // Parse the bitcode and copy. while (!Stream.AtEndOfStream()) { - if (Parser.Parse()) return true; + if (Parser.Parse()) + return true; } // Write out the copied results. - Output.write((char*)&OutputBuffer.front(), OutputBuffer.size()); + Output.write((char *)&OutputBuffer.front(), OutputBuffer.size()); return false; } // Build fast lookup abbreviation maps for each of the abbreviation blocks // defined in AbbrevsMap. -static void BuildAbbrevLookupMaps( +static void buildAbbrevLookupMaps( const NaClBitcodeCompressor::CompressFlags &Flags, BlockAbbrevsMapType &AbbrevsMap) { for (BlockAbbrevsMapType::const_iterator Iter = AbbrevsMap.begin(), IterEnd = AbbrevsMap.end(); Iter != IterEnd; ++Iter) { - Iter->second->BuildAbbrevLookupSizeMap(Flags); + Iter->second->buildAbbrevLookupSizeMap(Flags); } } @@ -1398,15 +1630,23 @@ static void BuildAbbrevLookupMaps( bool NaClBitcodeCompressor::analyze(MemoryBuffer *MemBuf, raw_ostream &Output) { BlockAbbrevsMapType BlockAbbrevsMap; - return !AnalyzeBitcode(Flags, MemBuf, Output, BlockAbbrevsMap); + return !analyzeBitcode(Flags, MemBuf, Output, BlockAbbrevsMap); } bool NaClBitcodeCompressor::compress(MemoryBuffer *MemBuf, raw_ostream &BitcodeOutput, raw_ostream &ShowOutput) { BlockAbbrevsMapType BlockAbbrevsMap; - if (AnalyzeBitcode(Flags, MemBuf, ShowOutput, BlockAbbrevsMap)) return false; - BuildAbbrevLookupMaps(Flags, BlockAbbrevsMap); - if (CopyBitcode(Flags, MemBuf, BitcodeOutput, BlockAbbrevsMap)) return false; - return true; + if (analyzeBitcode(Flags, MemBuf, ShowOutput, BlockAbbrevsMap)) + return false; + buildAbbrevLookupMaps(Flags, BlockAbbrevsMap); + BlockAbbrevsQueueMap AbbrevsQueueMap; + bool Result = true; + if (chooseAbbrevs(MemBuf, BlockAbbrevsMap, AbbrevsQueueMap)) + Result = false; + else if (copyBitcode(Flags, MemBuf, BitcodeOutput, BlockAbbrevsMap, + AbbrevsQueueMap)) + Result = false; + DeleteContainerSeconds(AbbrevsQueueMap); + return Result; } From 9c6f288f182d41dfccc757299ed0db0104366757 Mon Sep 17 00:00:00 2001 From: JF Bastien <jfb@chromium.org> Date: Wed, 25 Mar 2015 15:52:12 -0700 Subject: [PATCH 08/16] Avoid generating register-register load on ARM LLVM was generating register-register loads on ARM (rejected by the sandbox) when doing bswap16 because the pattern that combines the load wasn't predicated on NaCl. This patch adds this predication so that NaCl doesn't generate that pattern. The LLVM pattern was added in the following patch: https://llvm.org/svn/llvm-project/llvm/trunk@208620 BUG= https://code.google.com/p/chromium/issues/detail?id=460432 R=jvoung@chromium.org TEST= make check Review URL: https://codereview.chromium.org/1008233005 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 12 ++++---- lib/Target/ARM/ARMISelLowering.cpp | 8 ++--- lib/Target/ARM/ARMInstrInfo.td | 7 +++-- test/CodeGen/ARM/atomic-16bit-nacl.ll | 24 +++++++++++++++ test/CodeGen/ARM/bswap16.ll | 42 +++++++++++++++++++++++++++ 5 files changed, 81 insertions(+), 12 deletions(-) create mode 100644 test/CodeGen/ARM/atomic-16bit-nacl.ll diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index ada60bfd071..e0f1facac5a 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -689,8 +689,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDNode *Op, SDValue &Opc) { // @LOCALMOD-START // Avoid two reg addressing mode for loads and stores - const bool restrict_addressing_modes_for_nacl = Subtarget->isTargetNaCl() && - (Op->getOpcode() == ISD::LOAD || Op->getOpcode() == ISD::STORE); + const bool restrict_addressing_modes_for_nacl = + Subtarget->isTargetNaCl() && isa<MemSDNode>(Op); // This is neither a sandboxable load nor a sandboxable store. if (!restrict_addressing_modes_for_nacl) { // @LOCALMOD-END @@ -861,8 +861,8 @@ bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, // @LOCALMOD-BEGIN // Avoid two reg addressing mode for loads and stores - const bool restrict_addressing_modes_for_nacl = Subtarget->isTargetNaCl() && - (Op->getOpcode() == ISD::LOAD || Op->getOpcode() == ISD::STORE); + const bool restrict_addressing_modes_for_nacl = + Subtarget->isTargetNaCl() && isa<MemSDNode>(Op); // @LOCALMOD-END Offset = N; @@ -945,8 +945,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N, SDValue &Opc) { // @LOCALMOD-START // Avoid two reg addressing mode for loads and stores - const bool restrict_addressing_modes_for_nacl = Subtarget->isTargetNaCl() && - (Op->getOpcode() == ISD::LOAD || Op->getOpcode() == ISD::STORE); + const bool restrict_addressing_modes_for_nacl = + Subtarget->isTargetNaCl() && isa<MemSDNode>(Op); if (!restrict_addressing_modes_for_nacl) { // @LOCALMOD-END if (N.getOpcode() == ISD::SUB) { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index dc22781bfb3..c149f8bc8a8 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -10522,8 +10522,8 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, // @LOCALMOD-START // Avoid two reg addressing mode for loads and stores - const bool restrict_addressing_modes_for_nacl = Subtarget->isTargetNaCl() && - (N->getOpcode() == ISD::LOAD || N->getOpcode() == ISD::STORE); + const bool restrict_addressing_modes_for_nacl = + Subtarget->isTargetNaCl() && isa<MemSDNode>(N); if (restrict_addressing_modes_for_nacl) { return false; } @@ -10569,8 +10569,8 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, return false; // @LOCALMOD-START // Avoid two reg addressing mode for loads and stores - const bool restrict_addressing_modes_for_nacl = Subtarget->isTargetNaCl() && - (N->getOpcode() == ISD::LOAD || N->getOpcode() == ISD::STORE); + const bool restrict_addressing_modes_for_nacl = + Subtarget->isTargetNaCl() && isa<MemSDNode>(N); if (restrict_addressing_modes_for_nacl) { return false; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 703e1e776e0..ce166169167 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -276,6 +276,7 @@ def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">; def IsMachO : Predicate<"Subtarget->isTargetMachO()">; def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; +def IsNotNaCl : Predicate<"!Subtarget->isTargetNaCl()">; // @LOCALMOD def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">, AssemblerPredicate<"FeatureNaClTrap", "NaCl">; def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">; @@ -4306,9 +4307,11 @@ def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), Sched<[WriteALU]>; def : ARMV6Pat<(srl (bswap (extloadi16 addrmode3:$addr)), (i32 16)), - (REV16 (LDRH addrmode3:$addr))>; + (REV16 (LDRH addrmode3:$addr))>, + Requires<[IsARM, HasV6, IsNotNaCl]>; // @LOCALMOD def : ARMV6Pat<(truncstorei16 (srl (bswap GPR:$Rn), (i32 16)), addrmode3:$addr), - (STRH (REV16 GPR:$Rn), addrmode3:$addr)>; + (STRH (REV16 GPR:$Rn), addrmode3:$addr)>, + Requires<[IsARM, HasV6, IsNotNaCl]>; // @LOCALMOD let AddedComplexity = 5 in def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), diff --git a/test/CodeGen/ARM/atomic-16bit-nacl.ll b/test/CodeGen/ARM/atomic-16bit-nacl.ll new file mode 100644 index 00000000000..6c3c603973c --- /dev/null +++ b/test/CodeGen/ARM/atomic-16bit-nacl.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=armv7 < %s | FileCheck %s +; RUN: llc -mtriple=armv7-unknown-nacl < %s | FileCheck %s -check-prefix=NACL + +; Make sure NaCl doesn't generate register-register loads, which are disallowed +; by its sandbox. + +@i16_large = internal global [516 x i8] undef + +define void @test() { + %i = ptrtoint [516 x i8]* @i16_large to i32 + %a = add i32 %i, 512 + %a.asptr = inttoptr i32 %a to i16* + %loaded = load atomic i16* %a.asptr seq_cst, align 2 + ret void +} +; CHECK-LABEL: test: +; CHECK: ldrh r0, [r1, r0] + +; NACL-LABEL: test: +; NACL: movw r0, :lower16:i16_large +; NACL: movt r0, :upper16:i16_large +; NACL: add r0, r0, #512 +; NACL: ldrh r0, [r0] +; NACL: dmb ish diff --git a/test/CodeGen/ARM/bswap16.ll b/test/CodeGen/ARM/bswap16.ll index 70c62d294ee..99a26b3e2db 100644 --- a/test/CodeGen/ARM/bswap16.ll +++ b/test/CodeGen/ARM/bswap16.ll @@ -1,6 +1,9 @@ ; RUN: llc -mtriple=arm-darwin -mattr=v6 < %s | FileCheck %s ; RUN: llc -mtriple=thumb-darwin -mattr=v6 < %s | FileCheck %s +; @LOCALMOD Change to test that NaCl doesn't merge the pattern. +; RUN: llc -mtriple=armv7-unknown-nacl < %s | FileCheck %s -check-prefix=NACL + define void @test1(i16* nocapture %data) { entry: @@ -13,6 +16,12 @@ entry: ; CHECK: ldrh r[[R1:[0-9]+]], [r0] ; CHECK: rev16 r[[R1]], r[[R1]] ; CHECK: strh r[[R1]], [r0] + + ; NACL-LABEL: test1: + ; NACL: ldrh r[[R1:[0-9]+]], [r0] + ; NACL: rev r[[R1]], r[[R1]] + ; NACL: lsr r[[R1]], r[[R1]], #16 + ; NACL: strh r[[R1]], [r0] } @@ -25,6 +34,10 @@ entry: ; CHECK-LABEL: test2: ; CHECK: rev16 r[[R1:[0-9]+]], r1 ; CHECK: strh r[[R1]], [r0] + + ; NACL-LABEL: test2: + ; NACL: rev16 r[[R1:[0-9]+]], r1 + ; NACL: strh r[[R1]], [r0] } @@ -37,6 +50,35 @@ entry: ; CHECK-LABEL: test3: ; CHECK: ldrh r[[R0:[0-9]+]], [r0] ; CHECK: rev16 r[[R0]], r0 + + ; NACL-LABEL: test3: + ; NACL: ldrh r[[R0:[0-9]+]], [r0] + ; NACL: rev r[[R0]], r0 + ; NACL: lsr r[[R0]], r0, #16 +} + +define i16 @test4(i32 %in) { + %1 = add i32 %in, 256 + %2 = inttoptr i32 %1 to i32* + %3 = load i32* %2, align 2 + %4 = trunc i32 %3 to i16 + %5 = tail call i16 @llvm.bswap.i16(i16 %4) + %6 = add i32 %in, 258 + %7 = inttoptr i32 %6 to i16* + store i16 %5, i16* %7, align 2 + ret i16 %5 + + ; CHECK-LABEL: test4: + ; CHECK: ldrh r[[R1:[0-9]+]], [r0, r1] + ; CHECK: rev16 r[[R2:[0-9]+]], r[[R1]] + ; CHECK: strh r[[R2]], [r0, r3] + + ; NACL-LABEL: test4: + ; NACL: add r[[R0:[0-9]+]], r0, #256 + ; NACL: ldrh r[[R0]], [r0] + ; NACL: rev r[[R0]], r[[R0]] + ; NACL: lsr r[[R0]], r[[R0]], #16 + ; NACL: strh r[[R0]], [r1] } declare i16 @llvm.bswap.i16(i16) From 7acbb4c2014c255f56175eba6b63878ad2e59ebf Mon Sep 17 00:00:00 2001 From: JF Bastien <jfb@chromium.org> Date: Wed, 25 Mar 2015 18:27:10 -0700 Subject: [PATCH 09/16] PNaCl: Rewrite @llvm.assume calls to nothing. @llvm.assume calls can safely be removed. R=jfb@chromium.org TEST= (make -C toolchain_build/out/llvm_x86_64_linux_work -j `nproc` check) BUG= Review URL: https://codereview.chromium.org/1029273002 --- lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp | 4 ++- test/Transforms/NaCl/rewrite-assume.ll | 35 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/NaCl/rewrite-assume.ll diff --git a/lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp b/lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp index a11dd0f5b09..5b3cb3b270d 100644 --- a/lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp +++ b/lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp @@ -115,8 +115,10 @@ bool RewriteLLVMIntrinsics::runOnModule(Module &M) { // Remove all @llvm.prefetch intrinsics. ToNothing PrefetchRewriter(M, Intrinsic::prefetch); + ToNothing AssumeRewriter(M, Intrinsic::assume); - return visitUses(FltRoundsRewriter) | visitUses(PrefetchRewriter); + return visitUses(FltRoundsRewriter) | visitUses(PrefetchRewriter) + | visitUses(AssumeRewriter); } bool RewriteLLVMIntrinsics::visitUses(IntrinsicRewriter &Rewriter) { diff --git a/test/Transforms/NaCl/rewrite-assume.ll b/test/Transforms/NaCl/rewrite-assume.ll new file mode 100644 index 00000000000..50e5d2bb6ff --- /dev/null +++ b/test/Transforms/NaCl/rewrite-assume.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s +; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s -check-prefix=CLEANED +; Test the @llvm.assume part of the RewriteLLVMIntrinsics pass + +declare void @llvm.assume(i1) + +; No declaration or definition of llvm.assume() should remain. +; CLEANED-NOT: @llvm.assume + +define void @call_assume(i1 %val) { +; CHECK: call_assume +; CHECK-NEXT: ret void + call void @llvm.assume(i1 %val) + ret void +} + +; A more complex example with a number of calls in several BBs. +define void @multiple_calls(i1 %val) { +; CHECK: multiple_calls +entryblock: +; CHECK: entryblock +; CHECK-NEXT: br + call void @llvm.assume(i1 %val) + br i1 %val, label %exitblock, label %never +never: +; CHECK: never: +; CHECK-NEXT: br + call void @llvm.assume(i1 %val) + br label %exitblock +exitblock: +; CHECK: exitblock: +; CHECK-NEXT: ret void + call void @llvm.assume(i1 %val) + ret void +} From bf706a52911ea12b388504e5d7095118ad338376 Mon Sep 17 00:00:00 2001 From: JF Bastien <jfb@chromium.org> Date: Sun, 29 Mar 2015 10:17:17 -0700 Subject: [PATCH 10/16] PNaCl: Strip `nonnull` attributes from pointer arguments. R=jfb@chromium.org TEST= (make -C toolchain_build/out/llvm_x86_64_linux_work -j `nproc` check) BUG= Review URL: https://codereview.chromium.org/1042643002 --- lib/Transforms/NaCl/ReplacePtrsWithInts.cpp | 2 ++ test/Transforms/NaCl/replace-ptrs-with-ints.ll | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp b/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp index d1d2d179365..53f0d1d809e 100644 --- a/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp +++ b/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp @@ -331,6 +331,8 @@ static AttributeSet RemovePointerAttrs(LLVMContext &Context, case Attribute::NoAlias: case Attribute::ReadNone: case Attribute::ReadOnly: + case Attribute::NonNull: + case Attribute::Dereferenceable: break; default: AB.addAttribute(*Attr); diff --git a/test/Transforms/NaCl/replace-ptrs-with-ints.ll b/test/Transforms/NaCl/replace-ptrs-with-ints.ll index aaba9da0b7d..9d2974f4c7f 100644 --- a/test/Transforms/NaCl/replace-ptrs-with-ints.ll +++ b/test/Transforms/NaCl/replace-ptrs-with-ints.ll @@ -490,6 +490,16 @@ define void @readonly_readnone(i8* readonly readnone) { } ; CHECK-LABEL: define void @readonly_readnone(i32) +define nonnull i8* @nonnull_ptr(i8* nonnull) { + ret i8* undef +} +; CHECK-LABEL: define i32 @nonnull_ptr(i32) + +define dereferenceable(16) i8* @dereferenceable_ptr(i8* dereferenceable(8)) { + ret i8* undef +} +; CHECK-LABEL: define i32 @dereferenceable_ptr(i32) + ; "nounwind" should be preserved. define void @nounwind_func_attr() nounwind { ret void From ffdd8da4cf6d29ccf734253e147738e155b05182 Mon Sep 17 00:00:00 2001 From: JF Bastien <jfb@chromium.org> Date: Mon, 30 Mar 2015 08:49:12 -0700 Subject: [PATCH 11/16] Emscripten: do not simplify varargs calls to jsargs, exception and resume intrinsics See: https://github.com/kripken/emscripten-fastcomp/commit/7216560b2f1e66a7c9bb9a1b344ae6e9f0f89b7b https://github.com/kripken/emscripten-fastcomp/commit/daf116d3a6d43e8e4ae3f10e75181fd59e2e62a6 https://github.com/kripken/emscripten-fastcomp/commit/59962e958c83ac9427723a365751405212b3ffb3 R=dschuff@chromium.org, azakai@mozilla.com BUG= https://code.google.com/p/nativeclient/issues/detail?id=4102 TEST= make check Review URL: https://codereview.chromium.org/1042043002 --- lib/Transforms/NaCl/ExpandVarArgs.cpp | 36 ++++++++++++++----- .../NaCl/expand-varargs-emscripten.ll | 28 +++++++++++++++ 2 files changed, 55 insertions(+), 9 deletions(-) create mode 100644 test/Transforms/NaCl/expand-varargs-emscripten.ll diff --git a/lib/Transforms/NaCl/ExpandVarArgs.cpp b/lib/Transforms/NaCl/ExpandVarArgs.cpp index a7b677f16c2..6e2824684d8 100644 --- a/lib/Transforms/NaCl/ExpandVarArgs.cpp +++ b/lib/Transforms/NaCl/ExpandVarArgs.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -45,7 +46,22 @@ INITIALIZE_PASS(ExpandVarArgs, "expand-varargs", "Expand out variable argument function definitions and calls", false, false) -static void ExpandVarArgFunc(Function *Func) { +static bool isEmscriptenJSArgsFunc(Module *M, StringRef Name) { + // TODO(jfb) Make these intrinsics in clang and remove the assert: these + // intrinsics should only exist for Emscripten. + bool isEmscriptenSpecial = Name.equals("emscripten_asm_const_int") || + Name.equals("emscripten_asm_const_double") || + Name.equals("emscripten_landingpad") || + Name.equals("emscripten_resume"); + assert(isEmscriptenSpecial ? Triple(M->getTargetTriple()).isOSEmscripten() + : true); + return isEmscriptenSpecial; +} + +static bool ExpandVarArgFunc(Module *M, Function *Func) { + if (isEmscriptenJSArgsFunc(M, Func->getName())) + return false; + Type *PtrType = Type::getInt8PtrTy(Func->getContext()); FunctionType *FTy = Func->getFunctionType(); @@ -84,6 +100,8 @@ static void ExpandVarArgFunc(Function *Func) { } } } + + return true; } static void ExpandVAArgInst(VAArgInst *Inst, DataLayout *DL) { @@ -146,14 +164,16 @@ static void ExpandVACopyInst(VACopyInst *Inst) { // ExpandVarArgCall() converts a CallInst or InvokeInst to expand out // of varargs. It returns whether the module was modified. template <class InstType> -static bool ExpandVarArgCall(InstType *Call, DataLayout *DL) { +static bool ExpandVarArgCall(Module *M, InstType *Call, DataLayout *DL) { FunctionType *FuncType = cast<FunctionType>( Call->getCalledValue()->getType()->getPointerElementType()); if (!FuncType->isFunctionVarArg()) return false; + if (auto *F = dyn_cast<Function>(Call->getCalledValue())) + if (isEmscriptenJSArgsFunc(M, F->getName())) + return false; Function *F = Call->getParent()->getParent(); - Module *M = F->getParent(); LLVMContext &Ctx = M->getContext(); SmallVector<AttributeSet, 8> Attrs; @@ -284,17 +304,15 @@ bool ExpandVarArgs::runOnModule(Module &M) { Changed = true; ExpandVACopyInst(VAC); } else if (auto *Call = dyn_cast<CallInst>(I)) { - Changed |= ExpandVarArgCall(Call, &DL); + Changed |= ExpandVarArgCall(&M, Call, &DL); } else if (auto *Call = dyn_cast<InvokeInst>(I)) { - Changed |= ExpandVarArgCall(Call, &DL); + Changed |= ExpandVarArgCall(&M, Call, &DL); } } } - if (F->isVarArg()) { - Changed = true; - ExpandVarArgFunc(F); - } + if (F->isVarArg()) + Changed |= ExpandVarArgFunc(&M, F); } return Changed; diff --git a/test/Transforms/NaCl/expand-varargs-emscripten.ll b/test/Transforms/NaCl/expand-varargs-emscripten.ll new file mode 100644 index 00000000000..261ff94b74f --- /dev/null +++ b/test/Transforms/NaCl/expand-varargs-emscripten.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -mtriple=asmjs-unknown-emscripten -expand-varargs -S | FileCheck %s + +target datalayout = "p:32:32:32" + +%va_list = type i8* + +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) +declare void @llvm.va_copy(i8*, i8*) + +declare void @emscripten_asm_const_int(...) +declare void @emscripten_asm_const_double(...) +declare void @emscripten_landingpad(...) +declare void @emscripten_resume(...) + +define void @test(i32 %arg) { + call void (...)* @emscripten_asm_const_int(i32 %arg) + call void (...)* @emscripten_asm_const_double(i32 %arg) + call void (...)* @emscripten_landingpad(i32 %arg) + call void (...)* @emscripten_resume(i32 %arg) + ret void +} +; CHECK-LABEL: define void @test( +; CHECK-NEXT: call void (...)* @emscripten_asm_const_int(i32 %arg) +; CHECK-NEXT: call void (...)* @emscripten_asm_const_double(i32 %arg) +; CHECK-NEXT: call void (...)* @emscripten_landingpad(i32 %arg) +; CHECK-NEXT: call void (...)* @emscripten_resume(i32 %arg) +; CHECK-NEXT: ret void From e5cec77040ca489ed170ca77feaca47247e52dfd Mon Sep 17 00:00:00 2001 From: Derek Schuff <dschuff@chromium.org> Date: Mon, 30 Mar 2015 15:19:24 -0700 Subject: [PATCH 12/16] Cherrypick upstream r233321 We upstreamed our Triple localmod for ARM in r233321. Remove the LOCALMODs and update to the upstream version. R=jvoung@chromium.org BUG=None Review URL: https://codereview.chromium.org/1048993002 --- lib/Support/Triple.cpp | 9 ++------- unittests/ADT/TripleTest.cpp | 4 ++++ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index d436b493e85..b026084ef38 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -1011,13 +1011,6 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const { case llvm::Triple::Win32: // FIXME: this is invalid for WindowsCE return "cortex-a9"; - // @LOCALMOD-START - case llvm::Triple::NaCl: - // Default to armv7 unless something more specific is specified. - if (MArch == "arm") - return "cortex-a9"; - break; - // @LOCALMOD-END default: break; } @@ -1076,6 +1069,8 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const { default: return "strongarm"; } + case llvm::Triple::NaCl: + return "cortex-a8"; default: switch (getEnvironment()) { case llvm::Triple::EABIHF: diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp index cacbde66f35..6dc9b6d01e7 100644 --- a/unittests/ADT/TripleTest.cpp +++ b/unittests/ADT/TripleTest.cpp @@ -663,5 +663,9 @@ TEST(TripleTest, getARMCPUForArch) { EXPECT_STREQ("cortex-a8", Triple.getARMCPUForArch()); EXPECT_STREQ("swift", Triple.getARMCPUForArch("armv7s")); } + { + llvm::Triple Triple("arm--nacl"); + EXPECT_STREQ("cortex-a8", Triple.getARMCPUForArch("arm")); + } } } From cf83520bc939312a50a9f27fdc5240617392bfe8 Mon Sep 17 00:00:00 2001 From: JF Bastien <jfb@chromium.org> Date: Mon, 30 Mar 2015 15:32:48 -0700 Subject: [PATCH 13/16] Emscripten global cleanup: keep llvm.used Emscripten relies on llvm.used to figure out what's exported: https://github.com/kripken/emscripten-fastcomp/commit/91288b653d28c8fe4aeb69b06ce82d75646e72a7#diff-a7c3065843b374b0a91b623f66f6eb0a This patch also fixes the initialization of the globalcleanup.ll test: llvm.used can't use zeroinitializer (see test/Verifier/llvm.used-invalid-init.ll). R=dschuff@chromium.org, azakai@mozilla.com BUG= https://code.google.com/p/nativeclient/issues/detail?id=4102 TEST= make check Review URL: https://codereview.chromium.org/1048983003 --- lib/Transforms/NaCl/GlobalCleanup.cpp | 22 +++++++++++++++------- test/Transforms/NaCl/globalcleanup.ll | 11 +++++++++-- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/lib/Transforms/NaCl/GlobalCleanup.cpp b/lib/Transforms/NaCl/GlobalCleanup.cpp index d489fefc1dc..dffdc2c6caf 100644 --- a/lib/Transforms/NaCl/GlobalCleanup.cpp +++ b/lib/Transforms/NaCl/GlobalCleanup.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" @@ -72,25 +73,32 @@ static bool CleanUpLinkage(GlobalValue *GV) { bool GlobalCleanup::runOnModule(Module &M) { bool Modified = false; - if (GlobalVariable *GV = M.getNamedGlobal("llvm.compiler.used")) { + // TODO(jfb) Emscripten's JSBackend relies on llvm.used to figure out what's + // exported. Should it instead rely on visibility attributes? + bool keepLLVMUsed = Triple(M.getTargetTriple()).isOSEmscripten(); + + if (auto *GV = M.getNamedGlobal("llvm.compiler.used")) { GV->eraseFromParent(); Modified = true; } - if (GlobalVariable *GV = M.getNamedGlobal("llvm.used")) { - GV->eraseFromParent(); - Modified = true; + + if (!keepLLVMUsed) { + if (auto *GV = M.getNamedGlobal("llvm.used")) { + GV->eraseFromParent(); + Modified = true; + } } - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ) { + for (auto I = M.global_begin(), E = M.global_end(); I != E;) { GlobalVariable *GV = I++; Modified |= CleanUpLinkage(GV); } - for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { + for (auto I = M.begin(), E = M.end(); I != E;) { Function *F = I++; Modified |= CleanUpLinkage(F); } + return Modified; } diff --git a/test/Transforms/NaCl/globalcleanup.ll b/test/Transforms/NaCl/globalcleanup.ll index 5f976e22da6..3bb469364c9 100644 --- a/test/Transforms/NaCl/globalcleanup.ll +++ b/test/Transforms/NaCl/globalcleanup.ll @@ -1,8 +1,15 @@ ; RUN: opt < %s -nacl-global-cleanup -S | FileCheck %s ; RUN: opt < %s -nacl-global-cleanup -S | FileCheck -check-prefix=GV %s +; RUN: opt < %s -nacl-global-cleanup -mtriple=asmjs-unknown-emscripten -S | FileCheck -check-prefix=EM %s -@llvm.compiler.used = appending global [0 x i8*] zeroinitializer, section "llvm.metadata" -@llvm.used = appending global [0 x i8*] zeroinitializer, section "llvm.metadata" +@a = global i8 42 + +@llvm.compiler.used = appending global [1 x i8*] [i8* @a], section "llvm.metadata" +@llvm.used = appending global [1 x i8*] [i8* @a], section "llvm.metadata" + +; Emscripten preserves llvm.used but not llvm.compiler.used. +; EM-NOT: llvm.compiler.used +; EM: llvm.used ; GV-NOT: llvm.used ; GV-NOT: llvm.compiler.used From 2853e8fd2641d0e758d7fa860e6cd678185ca53c Mon Sep 17 00:00:00 2001 From: JF Bastien <jfb@chromium.org> Date: Mon, 30 Mar 2015 15:33:17 -0700 Subject: [PATCH 14/16] Remove redundant dead inst elimination Post-opt passes were running dead inst elimination as well as dead code elimination, which is strictly superior to dead inst elimination. R=dschuff@chromium.org BUG= none TEST= make check Review URL: https://codereview.chromium.org/1045043003 --- lib/Transforms/NaCl/PNaClABISimplify.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/Transforms/NaCl/PNaClABISimplify.cpp b/lib/Transforms/NaCl/PNaClABISimplify.cpp index c82575e0389..36949a2bfc8 100644 --- a/lib/Transforms/NaCl/PNaClABISimplify.cpp +++ b/lib/Transforms/NaCl/PNaClABISimplify.cpp @@ -173,8 +173,6 @@ void llvm::PNaClABISimplifyAddPostOptPasses(PassManagerBase &PM) { // ReplacePtrsWithInts leaves the lifetime.start/end intrinsics. PM.add(createStripDeadPrototypesPass()); - // Eliminate simple dead code that the post-opt passes could have - // created. - PM.add(createDeadInstEliminationPass()); + // Eliminate simple dead code that the post-opt passes could have created. PM.add(createDeadCodeEliminationPass()); } From 7afd1896f1d90b035fb774b54f7d369957d48753 Mon Sep 17 00:00:00 2001 From: Derek Schuff <dschuff@chromium.org> Date: Tue, 31 Mar 2015 14:11:00 -0700 Subject: [PATCH 15/16] Do not tailcall optimize vararg calls with 6 arguments TCrets use a reduced subset of registers because they cannot use callee-saves. When using sandbox base hiding (with r11 reserved) there are not enough registers because all of the available registers are used for arguments. R=jvoung@chromium.org BUG= https://code.google.com/p/nativeclient/issues/detail?id=4123 Review URL: https://codereview.chromium.org/1040233005 --- lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++++++++++++ test/CodeGen/X86/tailcall-64.ll | 9 +++++++++ 2 files changed, 27 insertions(+) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 88d7ffaf10f..ea24da756df 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17,6 +17,7 @@ #include "X86CallingConv.h" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" +#include "X86NaClDecls.h" // @LOCALMOD #include "X86TargetMachine.h" #include "X86TargetObjectFile.h" #include "llvm/ADT/SmallBitVector.h" @@ -3404,6 +3405,23 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) if (!ArgLocs[i].isRegLoc()) return false; + // @LOCALMOD-BEGIN + // Do not optimize vararg calls with 6 arguments. TCrets use a reduced + // subset of registers because they cannot use callee-saves. When using + // sandbox base hiding (with r11 reserved) there are not enough registers + // because all of the available registers are used for arguments (%al is + // also used for the float/vector register count). + // Another option to fix this would be to replace r11 with r10 in the + // GR64_TC register class if we decide we don't care about supporting "nest" + // parameters. It might even be technically possible to actually use r11 + // because for tcreturn it's used as a jump target and not read, but I don't + // know if reserved regs can be set differently for different purposes like + // that. + if (Subtarget->isTargetNaCl64() && + FlagHideSandboxBase && + ArgLocs.size() > 5) + return false; + // @LOCALMOD-END } // If the call result is in ST0 / ST1, it needs to be popped off the x87 diff --git a/test/CodeGen/X86/tailcall-64.ll b/test/CodeGen/X86/tailcall-64.ll index deab1dcc7eb..e4e696073aa 100644 --- a/test/CodeGen/X86/tailcall-64.ll +++ b/test/CodeGen/X86/tailcall-64.ll @@ -1,4 +1,8 @@ ; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=core2 < %s | FileCheck %s +; @LOCALMOD +; TODO: NaCl tailcall "support" is a mess and has no tests. This only tests for +; Bug https://code.google.com/p/nativeclient/issues/detail?id=4123 +; RUN: llc -mtriple=x86_64-unknown-nacl -mcpu=core2 < %s | FileCheck %s --check-prefix=NACL declare i64 @testi() @@ -208,6 +212,10 @@ entry: ; ; CHECK-LABEL: rdar12282281 ; CHECK: jmpq *%r11 # TAILCALL + +; For NaCl, ensure we do not tail-call-optimize 6-arg functions +; NACL-LABEL: rdar12282281 +; NACL: naclcall @funcs = external constant [0 x i32 (i8*, ...)*] define i32 @rdar12282281(i32 %n) nounwind uwtable ssp { @@ -223,6 +231,7 @@ define x86_fp80 @fp80_call(x86_fp80 %x) nounwind { entry: ; CHECK-LABEL: fp80_call: ; CHECK: jmp _fp80_callee +; NACL-LABEL: fp80_call %call = tail call x86_fp80 @fp80_callee(x86_fp80 %x) nounwind ret x86_fp80 %call } From 91e76b38a9aebacd1f3eb80863346cfdd5f768a3 Mon Sep 17 00:00:00 2001 From: Alon Zakai <alonzakai@gmail.com> Date: Tue, 31 Mar 2015 17:10:29 -0700 Subject: [PATCH 16/16] 1.30.2 --- emscripten-version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emscripten-version.txt b/emscripten-version.txt index 816b2c7aa4f..5677c30f982 100644 --- a/emscripten-version.txt +++ b/emscripten-version.txt @@ -1,2 +1,2 @@ -1.30.1 +1.30.2