diff --git a/emscripten-version.txt b/emscripten-version.txt index bf2b5ccfefb..87684ab8378 100644 --- a/emscripten-version.txt +++ b/emscripten-version.txt @@ -1,2 +1,2 @@ -1.13.0 +1.13.1 diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index a7a1cc2b94f..ab5eaf027c5 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -1151,14 +1151,21 @@ bool ModuleLinker::run() { DstM->setTargetTriple(SrcM->getTargetTriple()); if (!SrcM->getDataLayout().empty() && !DstM->getDataLayout().empty() && - SrcM->getDataLayout() != DstM->getDataLayout()) - errs() << "WARNING: Linking two modules of different data layouts!\n"; + SrcM->getDataLayout() != DstM->getDataLayout()) { + // XXX EMSCRIPTEN: backport r203009 from LLVM trunk: + errs() << "WARNING: Linking two modules of different data layouts: '" + << SrcM->getModuleIdentifier() << "' is '" + << SrcM->getDataLayout() << "' whereas '" + << DstM->getModuleIdentifier() << "' is '" + << DstM->getDataLayout() << "'\n"; + } if (!SrcM->getTargetTriple().empty() && DstM->getTargetTriple() != SrcM->getTargetTriple()) { - errs() << "WARNING: Linking two modules of different target triples: "; - if (!SrcM->getModuleIdentifier().empty()) - errs() << SrcM->getModuleIdentifier() << ": "; - errs() << "'" << SrcM->getTargetTriple() << "' and '" + // XXX EMSCRIPTEN: backport r203009 from LLVM trunk: + errs() << "WARNING: Linking two modules of different target triples: " + << SrcM->getModuleIdentifier() << "' is '" + << SrcM->getTargetTriple() << "' whereas '" + << DstM->getModuleIdentifier() << "' is '" << DstM->getTargetTriple() << "'\n"; } diff --git a/lib/Target/JSBackend/AllocaManager.cpp b/lib/Target/JSBackend/AllocaManager.cpp new file mode 100644 index 00000000000..b49e6c4de13 --- /dev/null +++ b/lib/Target/JSBackend/AllocaManager.cpp @@ -0,0 +1,527 @@ +//===-- AllocaManager.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AllocaManager class. +// +// The AllocaManager computes a frame layout, assigning every static alloca an +// offset. It does alloca liveness analysis in order to reuse stack memory, +// using lifetime intrinsics. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "allocamanager" +#include "AllocaManager.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Timer.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumAllocas, "Number of allocas eliminated"); + +// Return the size of the given alloca. +uint64_t AllocaManager::getSize(const AllocaInst *AI) { + assert(AI->isStaticAlloca()); + return DL->getTypeAllocSize(AI->getAllocatedType()) * + cast(AI->getArraySize())->getValue().getZExtValue(); +} + +// Return the alignment of the given alloca. +unsigned AllocaManager::getAlignment(const AllocaInst *AI) { + assert(AI->isStaticAlloca()); + return std::max(AI->getAlignment(), + DL->getABITypeAlignment(AI->getAllocatedType())); +} + +AllocaManager::AllocaInfo AllocaManager::getInfo(const AllocaInst *AI) { + assert(AI->isStaticAlloca()); + return AllocaInfo(AI, getSize(AI), getAlignment(AI)); +} + +// Given a lifetime_start or lifetime_end intrinsic, determine if it's +// describing a static alloc memory region suitable for our analysis. If so, +// return the alloca, otherwise return NULL. +const AllocaInst * +AllocaManager::getAllocaFromIntrinsic(const CallInst *CI) { + const IntrinsicInst *II = cast(CI); + assert(II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end); + + // Lifetime intrinsics have a size as their first argument and a pointer as + // their second argument. + const Value *Size = II->getArgOperand(0); + const Value *Ptr = II->getArgOperand(1); + + // Check to see if we can convert the size to a host integer. If we can't, + // it's probably not worth worrying about. + const ConstantInt *SizeCon = dyn_cast(Size); + if (!SizeCon) return NULL; + const APInt &SizeAP = SizeCon->getValue(); + if (SizeAP.getActiveBits() > 64) return NULL; + uint64_t MarkedSize = SizeAP.getZExtValue(); + + // We're only interested if the pointer is a static alloca. + const AllocaInst *AI = dyn_cast(Ptr->stripPointerCasts()); + if (!AI || !AI->isStaticAlloca()) return NULL; + + // Make sure the size covers the alloca. + if (MarkedSize < getSize(AI)) return NULL; + + return AI; +} + +int AllocaManager::AllocaSort(const void *l, const void *r) { + const AllocaInfo *li = static_cast(l); + const AllocaInfo *ri = static_cast(r); + + // Sort by alignment to minimize padding. + if (li->getAlignment() > ri->getAlignment()) return -1; + if (li->getAlignment() < ri->getAlignment()) return 1; + + // Ensure a stable sort. We can do this because the pointers are + // pointing into the same array. + if (li > ri) return -1; + if (li < ri) return 1; + + return 0; +} + +// Collect allocas +void AllocaManager::collectMarkedAllocas() { + NamedRegionTimer Timer("Collect Marked Allocas", "AllocaManager", + TimePassesIsEnabled); + + // Weird semantics: If an alloca *ever* appears in a lifetime start or end + // within the same function, its lifetime begins only at the explicit lifetime + // starts and ends only at the explicit lifetime ends and function exit + // points. Otherwise, its lifetime begins in the entry block and it is live + // everywhere. + // + // And so, instead of just walking the entry block to find all the static + // allocas, we walk the whole body to find the intrinsics so we can find the + // set of static allocas referenced in the intrinsics. + for (Function::const_iterator FI = F->begin(), FE = F->end(); + FI != FE; ++FI) { + for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + const CallInst *CI = dyn_cast(BI); + if (!CI) continue; + + const Value *Callee = CI->getCalledValue(); + if (Callee == LifetimeStart || Callee == LifetimeEnd) { + if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) { + Allocas.insert(std::make_pair(AI, 0)); + } + } + } + } + + // All that said, we still want the intrinsics in the order they appear in the + // block, so that we can represent later ones with earlier ones and skip + // worrying about dominance, so run through the entry block and index those + // allocas which we identified above. + AllocasByIndex.reserve(Allocas.size()); + const BasicBlock *EntryBB = &F->getEntryBlock(); + for (BasicBlock::const_iterator BI = EntryBB->begin(), BE = EntryBB->end(); + BI != BE; ++BI) { + const AllocaInst *AI = dyn_cast(BI); + if (!AI || !AI->isStaticAlloca()) continue; + + AllocaMap::iterator I = Allocas.find(AI); + if (I != Allocas.end()) { + I->second = AllocasByIndex.size(); + AllocasByIndex.push_back(getInfo(AI)); + } + } + assert(AllocasByIndex.size() == Allocas.size()); +} + +// Calculate the starting point from which inter-block liveness will be +// computed. +void AllocaManager::collectBlocks() { + NamedRegionTimer Timer("Collect Blocks", "AllocaManager", + TimePassesIsEnabled); + + size_t AllocaCount = AllocasByIndex.size(); + + BitVector Seen(AllocaCount); + + for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) { + const BasicBlock *BB = I; + + BlockLifetimeInfo &BLI = BlockLiveness[BB]; + BLI.Start.resize(AllocaCount); + BLI.End.resize(AllocaCount); + + // Track which allocas we've seen. This is used because if a lifetime start + // is the first lifetime marker for an alloca in a block, the alloca is + // live-in. + Seen.reset(); + + // Walk the instructions and compute the Start and End sets. + for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + const CallInst *CI = dyn_cast(BI); + if (!CI) continue; + + const Value *Callee = CI->getCalledValue(); + if (Callee == LifetimeStart) { + if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) { + AllocaMap::const_iterator MI = Allocas.find(AI); + if (MI != Allocas.end()) { + size_t AllocaIndex = MI->second; + if (!Seen.test(AllocaIndex)) { + BLI.Start.set(AllocaIndex); + } + BLI.End.reset(AllocaIndex); + Seen.set(AllocaIndex); + } + } + } else if (Callee == LifetimeEnd) { + if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) { + AllocaMap::const_iterator MI = Allocas.find(AI); + if (MI != Allocas.end()) { + size_t AllocaIndex = MI->second; + BLI.End.set(AllocaIndex); + Seen.set(AllocaIndex); + } + } + } + } + + // Lifetimes that start in this block and do not end here are live-out. + BLI.LiveOut = BLI.Start; + BLI.LiveOut.reset(BLI.End); + if (BLI.LiveOut.any()) { + for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + InterBlockWorklist.insert(*SI); + } + } + + // Lifetimes that end in this block and do not start here are live-in. + // TODO: Is this actually true? What are the semantics of a standalone + // lifetime end? See also the code in computeInterBlockLiveness. + BLI.LiveIn = BLI.End; + BLI.LiveIn.reset(BLI.Start); + if (BLI.LiveIn.any()) { + for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + PI != PE; ++PI) { + InterBlockWorklist.insert(*PI); + } + } + } +} + +// Compute the LiveIn and LiveOut sets for each block in F. +void AllocaManager::computeInterBlockLiveness() { + NamedRegionTimer Timer("Compute inter-block liveness", "AllocaManager", + TimePassesIsEnabled); + + size_t AllocaCount = AllocasByIndex.size(); + + BitVector Temp(AllocaCount); + + // This is currently using a very simple-minded bi-directional liveness + // propagation algorithm. Numerous opportunities for compile time + // speedups here. + while (!InterBlockWorklist.empty()) { + const BasicBlock *BB = InterBlockWorklist.pop_back_val(); + BlockLifetimeInfo &BLI = BlockLiveness[BB]; + + // Compute the new live-in set. + for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + PI != PE; ++PI) { + Temp |= BlockLiveness[*PI].LiveOut; + } + + // If it contains new live blocks, prepare to propagate them. + if (Temp.test(BLI.LiveIn)) { + BLI.LiveIn |= Temp; + BitVector LiveOut = BLI.LiveOut; + BLI.LiveOut |= Temp; + BLI.LiveOut.reset(BLI.End); + // If we actually added to live-out, re-process them + if (BLI.LiveOut.test(LiveOut)) { + for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + InterBlockWorklist.insert(*SI); + } + } + } + Temp.reset(); + + // Compute the new live-out set. + for (succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + Temp |= BlockLiveness[*SI].LiveIn; + } + + // If it contains new live blocks, prepare to propagate them. + if (Temp.test(BLI.LiveOut)) { + // TODO: As above, what are the semantics of a standalone lifetime end? + BLI.LiveOut |= Temp; + BitVector LiveIn = BLI.LiveIn; + BLI.LiveIn |= Temp; + BLI.LiveIn.reset(BLI.Start); + // If we actually added to live-in, re-process them + if (BLI.LiveIn.test(LiveIn)) { + for (const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + PI != PE; ++PI) { + InterBlockWorklist.insert(*PI); + } + } + } + Temp.reset(); + } +} + +// Determine overlapping liveranges within blocks. +void AllocaManager::computeIntraBlockLiveness() { + NamedRegionTimer Timer("Compute intra-block liveness", "AllocaManager", + TimePassesIsEnabled); + + size_t AllocaCount = AllocasByIndex.size(); + + BitVector Current(AllocaCount); + + AllocaCompatibility.resize(AllocaCount, BitVector(AllocaCount, true)); + + for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) { + const BasicBlock *BB = I; + const BlockLifetimeInfo &BLI = BlockLiveness[BB]; + + Current = BLI.LiveIn; + + for (int i = Current.find_first(); i >= 0; i = Current.find_next(i)) { + AllocaCompatibility[i].reset(Current); + } + + for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + const CallInst *CI = dyn_cast(BI); + if (!CI) continue; + + const Value *Callee = CI->getCalledValue(); + if (Callee == LifetimeStart) { + if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) { + size_t AIndex = Allocas[AI]; + // We conflict with everything else that's currently live. + AllocaCompatibility[AIndex].reset(Current); + // Everything else that's currently live conflicts with us. + for (int i = Current.find_first(); i >= 0; i = Current.find_next(i)) { + AllocaCompatibility[i].reset(AIndex); + } + // We're now live. + Current.set(AIndex); + } + } else if (Callee == LifetimeEnd) { + if (const AllocaInst *AI = getAllocaFromIntrinsic(CI)) { + size_t AIndex = Allocas[AI]; + // We're no longer live. + Current.reset(AIndex); + } + } + } + } +} + +// Decide which allocas will represent which other allocas, and if so what their +// size and alignment will need to be. +void AllocaManager::computeRepresentatives() { + NamedRegionTimer Timer("Compute Representatives", "AllocaManager", + TimePassesIsEnabled); + + for (size_t i = 0, e = AllocasByIndex.size(); i != e; ++i) { + // If we've already represented this alloca with another, don't visit it. + if (AllocasByIndex[i].isForwarded()) continue; + if (i > size_t(INT_MAX)) continue; + + // Find compatible allocas. This is a simple greedy algorithm. + for (int j = int(i); ; ) { + assert(j >= int(i)); + j = AllocaCompatibility[i].find_next(j); + assert(j != int(i)); + if (j < 0) break; + if (!AllocaCompatibility[j][i]) continue; + + DEBUG(dbgs() << "Allocas: " + "Representing " + << AllocasByIndex[j].getInst()->getName() << " " + "with " + << AllocasByIndex[i].getInst()->getName() << "\n"); + ++NumAllocas; + + assert(!AllocasByIndex[j].isForwarded()); + + AllocasByIndex[i].mergeSize(AllocasByIndex[j].getSize()); + AllocasByIndex[i].mergeAlignment(AllocasByIndex[j].getAlignment()); + AllocasByIndex[j].forward(i); + + AllocaCompatibility[i] &= AllocaCompatibility[j]; + AllocaCompatibility[j].reset(); + } + } +} + +void AllocaManager::computeFrameOffsets() { + NamedRegionTimer Timer("Compute Frame Offsets", "AllocaManager", + TimePassesIsEnabled); + + // Walk through the entry block and collect all the allocas, including the + // ones with no lifetime markers that we haven't looked at yet. We walk in + // reverse order so that we can set the representative allocas as those that + // dominate the others as we go. + const BasicBlock *EntryBB = &F->getEntryBlock(); + for (BasicBlock::const_iterator BI = EntryBB->begin(), BE = EntryBB->end(); + BI != BE; ++BI) { + const AllocaInst *AI = dyn_cast(BI); + if (!AI || !AI->isStaticAlloca()) continue; + + AllocaMap::const_iterator I = Allocas.find(AI); + if (I != Allocas.end()) { + // An alloca with lifetime markers. Emit the record we've crafted for it, + // if we've chosen to keep it as a representative. + const AllocaInfo &Info = AllocasByIndex[I->second]; + if (!Info.isForwarded()) { + SortedAllocas.push_back(Info); + } + } else { + // An alloca with no lifetime markers. + SortedAllocas.push_back(getInfo(AI)); + } + } + + // Sort the allocas to hopefully reduce padding. + array_pod_sort(SortedAllocas.begin(), SortedAllocas.end(), AllocaSort); + + // Assign stack offsets. + uint64_t CurrentOffset = 0; + for (SmallVectorImpl::const_iterator I = SortedAllocas.begin(), + E = SortedAllocas.end(); I != E; ++I) { + const AllocaInfo &Info = *I; + uint64_t NewOffset = RoundUpToAlignment(CurrentOffset, Info.getAlignment()); + + // For backwards compatibility, align every power-of-two multiple alloca to + // its greatest power-of-two factor, up to 8 bytes. In particular, cube2hash + // is known to depend on this. + // TODO: Consider disabling this and making people fix their code. + if (uint64_t Size = Info.getSize()) { + uint64_t P2 = uint64_t(1) << CountTrailingZeros_64(Size); + unsigned CompatAlign = unsigned(std::min(P2, uint64_t(8))); + NewOffset = RoundUpToAlignment(NewOffset, CompatAlign); + } + + const AllocaInst *AI = Info.getInst(); + StaticAllocas[AI] = StaticAllocation(AI, NewOffset); + + CurrentOffset = NewOffset + Info.getSize(); + } + + // Add allocas that were represented by other allocas to the StaticAllocas map + // so that our clients can look them up. + for (unsigned i = 0, e = AllocasByIndex.size(); i != e; ++i) { + const AllocaInfo &Info = AllocasByIndex[i]; + if (!Info.isForwarded()) continue; + size_t j = Info.getForwardedID(); + assert(!AllocasByIndex[j].isForwarded()); + + StaticAllocaMap::const_iterator I = + StaticAllocas.find(AllocasByIndex[j].getInst()); + assert(I != StaticAllocas.end()); + + std::pair Pair = + StaticAllocas.insert(std::make_pair(AllocasByIndex[i].getInst(), + I->second)); + assert(Pair.second); (void)Pair; + } + + // Record the final frame size. Keep the stack pointer 16-byte aligned. + FrameSize = CurrentOffset; + FrameSize = RoundUpToAlignment(FrameSize, 16); + + DEBUG(dbgs() << "Allocas: " + "Statically allocated frame size is " << FrameSize << "\n"); +} + +AllocaManager::AllocaManager() { +} + +void AllocaManager::analyze(const Function &Func, const DataLayout &Layout, + bool PerformColoring) { + NamedRegionTimer Timer("AllocaManager", TimePassesIsEnabled); + assert(Allocas.empty()); + assert(AllocasByIndex.empty()); + assert(AllocaCompatibility.empty()); + assert(BlockLiveness.empty()); + assert(StaticAllocas.empty()); + assert(SortedAllocas.empty()); + + DL = &Layout; + F = &Func; + + // Get the declarations for the lifetime intrinsics so we can quickly test to + // see if they are used at all, and for use later if they are. + const Module *M = F->getParent(); + LifetimeStart = M->getFunction(Intrinsic::getName(Intrinsic::lifetime_start)); + LifetimeEnd = M->getFunction(Intrinsic::getName(Intrinsic::lifetime_end)); + + // If we are optimizing and the module contains any lifetime intrinsics, run + // the alloca coloring algorithm. + if (PerformColoring && + ((LifetimeStart && !LifetimeStart->use_empty()) || + (LifetimeEnd && !LifetimeEnd->use_empty()))) { + + collectMarkedAllocas(); + + if (!AllocasByIndex.empty()) { + DEBUG(dbgs() << "Allocas: " + << AllocasByIndex.size() << " marked allocas found\n"); + + collectBlocks(); + computeInterBlockLiveness(); + computeIntraBlockLiveness(); + BlockLiveness.clear(); + + computeRepresentatives(); + AllocaCompatibility.clear(); + } + } + + computeFrameOffsets(); + SortedAllocas.clear(); + Allocas.clear(); + AllocasByIndex.clear(); +} + +void AllocaManager::clear() { + StaticAllocas.clear(); +} + +bool +AllocaManager::getFrameOffset(const AllocaInst *AI, uint64_t *Offset) const { + assert(AI->isStaticAlloca()); + StaticAllocaMap::const_iterator I = StaticAllocas.find(AI); + assert(I != StaticAllocas.end()); + *Offset = I->second.Offset; + return AI == I->second.Representative; +} + +const AllocaInst * +AllocaManager::getRepresentative(const AllocaInst *AI) const { + assert(AI->isStaticAlloca()); + StaticAllocaMap::const_iterator I = StaticAllocas.find(AI); + assert(I != StaticAllocas.end()); + return I->second.Representative; +} diff --git a/lib/Target/JSBackend/AllocaManager.h b/lib/Target/JSBackend/AllocaManager.h new file mode 100644 index 00000000000..44b07981bc3 --- /dev/null +++ b/lib/Target/JSBackend/AllocaManager.h @@ -0,0 +1,172 @@ +//===-- AllocaManager.h ---------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass declares the AllocaManager class. +// +//===----------------------------------------------------------------------===// + +#ifndef JSBACKEND_ALLOCAMANAGER_H +#define JSBACKEND_ALLOCAMANAGER_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SetVector.h" + +namespace llvm { + +class AllocaInst; +class BasicBlock; +class CallInst; +class DataLayout; +class Function; + +/// Compute frame layout for allocas. +class AllocaManager { + const DataLayout *DL; + const Function *LifetimeStart; + const Function *LifetimeEnd; + const Function *F; + + // Per-block lifetime information. + struct BlockLifetimeInfo { + BitVector Start; + BitVector End; + BitVector LiveIn; + BitVector LiveOut; + }; + typedef DenseMap LivenessMap; + LivenessMap BlockLiveness; + + // Worklist for inter-block liveness analysis. + typedef SmallSetVector InterBlockWorklistVec; + InterBlockWorklistVec InterBlockWorklist; + + // Map allocas to their index in AllocasByIndex. + typedef DenseMap AllocaMap; + AllocaMap Allocas; + + // Information about an alloca. Note that the size and alignment may vary + // from what's in the actual AllocaInst when an alloca is also representing + // another with perhaps greater size and/or alignment needs. + // + // When an alloca is represented by another, its AllocaInfo is marked as + // "forwarded", at which point it no longer holds a size and alignment, but + // the index of the representative AllocaInfo. + class AllocaInfo { + const AllocaInst *Inst; + uint64_t Size; + unsigned Alignment; + + public: + AllocaInfo(const AllocaInst *I, uint64_t S, unsigned A) + : Inst(I), Size(S), Alignment(A) { + assert(I != NULL); + assert(A != 0); + assert(!isForwarded()); + } + + bool isForwarded() const { return Alignment == 0; } + + size_t getForwardedID() const { + assert(isForwarded()); + return static_cast(Size); + } + + void forward(size_t i) { + assert(!isForwarded()); + Alignment = 0; + Size = i; + assert(isForwarded()); + assert(getForwardedID() == i); + } + + const AllocaInst *getInst() const { return Inst; } + + uint64_t getSize() const { assert(!isForwarded()); return Size; } + unsigned getAlignment() const { assert(!isForwarded()); return Alignment; } + + void mergeSize(uint64_t S) { + assert(!isForwarded()); + Size = std::max(Size, S); + assert(!isForwarded()); + } + void mergeAlignment(unsigned A) { + assert(A != 0); + assert(!isForwarded()); + Alignment = std::max(Alignment, A); + assert(!isForwarded()); + } + }; + typedef SmallVector AllocaVec; + AllocaVec AllocasByIndex; + + // For each alloca, which allocas can it safely represent? Allocas are + // identified by AllocasByIndex index. + // TODO: Vector-of-vectors isn't the fastest data structure possible here. + typedef SmallVector AllocaCompatibilityVec; + AllocaCompatibilityVec AllocaCompatibility; + + // This is for allocas that will eventually be sorted. + SmallVector SortedAllocas; + + // Static allocation results. + struct StaticAllocation { + const AllocaInst *Representative; + uint64_t Offset; + StaticAllocation() {} + StaticAllocation(const AllocaInst *A, uint64_t O) + : Representative(A), Offset(O) {} + }; + typedef DenseMap StaticAllocaMap; + StaticAllocaMap StaticAllocas; + uint64_t FrameSize; + + uint64_t getSize(const AllocaInst *AI); + unsigned getAlignment(const AllocaInst *AI); + AllocaInfo getInfo(const AllocaInst *AI); + const AllocaInst *getAllocaFromIntrinsic(const CallInst *CI); + static int AllocaSort(const void *l, const void *r); + + void collectMarkedAllocas(); + void collectBlocks(); + void computeInterBlockLiveness(); + void computeIntraBlockLiveness(); + void computeRepresentatives(); + void computeFrameOffsets(); + +public: + AllocaManager(); + + /// Analyze the given function and prepare for getRepresentative queries. + void analyze(const Function &Func, const DataLayout &Layout, + bool PerformColoring); + + /// Reset all stored state. + void clear(); + + /// Return the representative alloca for the given alloca. When allocas are + /// merged, one is chosen as the representative to stand for the rest. + /// References to the alloca should take the form of references to the + /// representative. + const AllocaInst *getRepresentative(const AllocaInst *AI) const; + + /// Set *offset to the frame offset for the given alloca. Return true if the + /// given alloca is representative, meaning that it needs an explicit + /// definition in the function entry. Return false if some other alloca + /// represents this one. + bool getFrameOffset(const AllocaInst *AI, uint64_t *offset) const; + + /// Return the total frame size for all static allocas and associated padding. + uint64_t getFrameSize() const { return FrameSize; } +}; + +} // namespace llvm + +#endif diff --git a/lib/Target/JSBackend/CMakeLists.txt b/lib/Target/JSBackend/CMakeLists.txt index 37704f316fd..24622e1c3f2 100644 --- a/lib/Target/JSBackend/CMakeLists.txt +++ b/lib/Target/JSBackend/CMakeLists.txt @@ -1,8 +1,13 @@ add_llvm_target(JSBackendCodeGen + AllocaManager.cpp ExpandI64.cpp JSBackend.cpp + JSTargetMachine.cpp Relooper.cpp SimplifyAllocas.cpp ) +add_dependencies(LLVMJSBackendCodeGen intrinsics_gen) + add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/JSBackend/CallHandlers.h b/lib/Target/JSBackend/CallHandlers.h index 93275b9637f..8ae8c26c1d8 100644 --- a/lib/Target/JSBackend/CallHandlers.h +++ b/lib/Target/JSBackend/CallHandlers.h @@ -5,7 +5,7 @@ typedef std::string (JSWriter::*CallHandler)(const Instruction*, std::string Name, int NumArgs); typedef std::map CallHandlerMap; -CallHandlerMap *CallHandlers; +CallHandlerMap CallHandlers; // Definitions @@ -44,6 +44,36 @@ DEF_CALL_HANDLER(__default__, { if (F) { NeedCasts = F->isDeclaration(); // if ffi call, need casts FT = F->getFunctionType(); + if (EmscriptenAssertions) { + if (!FT->isVarArg()) { + unsigned TypeNumArgs = FT->getNumParams(); + unsigned ActualNumArgs = getNumArgOperands(CI); + if (TypeNumArgs != ActualNumArgs) { + errs().changeColor(raw_ostream::YELLOW); + errs() << "warning:"; + errs().resetColor(); + errs() << " unexpected number of arguments " << utostr(ActualNumArgs) << " in call to '" << F->getName() << "', should be " << utostr(TypeNumArgs) << "\n"; + } + for (unsigned i = 0; i < std::min(TypeNumArgs, ActualNumArgs); i++) { + Type *TypeType = FT->getParamType(i); + Type *ActualType = CI->getOperand(i)->getType(); + if (getFunctionSignatureLetter(TypeType) != getFunctionSignatureLetter(ActualType)) { + errs().changeColor(raw_ostream::YELLOW); + errs() << "warning:"; + errs().resetColor(); + errs() << " unexpected argument type " << *ActualType << " at index " << utostr(i) << " in call to '" << F->getName() << "', should be " << *TypeType << "\n"; + } + } + } + Type *TypeType = FT->getReturnType(); + Type *ActualType = CI->getType(); + if (getFunctionSignatureLetter(TypeType) != getFunctionSignatureLetter(ActualType)) { + errs().changeColor(raw_ostream::YELLOW); + errs() << "warning:"; + errs().resetColor(); + errs() << " unexpected return type " << *ActualType << " in call to '" << F->getName() << "', should be " << *TypeType << "\n"; + } + } } else { if (isAbsolute(CV)) return "abort(); /* segfault, call an absolute addr */"; // function pointer call @@ -71,8 +101,14 @@ DEF_CALL_HANDLER(__default__, { } if (NumArgs > 0) text += ","; } - // this is an ffi call if we need casts, and it is not a Math_ builtin (with just 1 arg - Math with more args is different XXX) - bool FFI = NeedCasts && (NumArgs > 1 || Name.find("Math_") != 0); + // this is an ffi call if we need casts, and it is not a special Math_ builtin + bool FFI = NeedCasts; + if (FFI && Name.find("Math_") == 0) { + if (Name == "Math_ceil" || Name == "Math_floor" || Name == "Math_min" || Name == "Math_max" || Name == "Math_sqrt" || Name == "Math_abs") { + // This special Math builtin is optimizable with all types, including floats, so can treat it as non-ffi + FFI = false; + } + } unsigned FFI_OUT = FFI ? ASM_FFI_OUT : 0; for (int i = 0; i < NumArgs; i++) { if (!NeedCasts) { @@ -153,6 +189,12 @@ DEF_CALL_HANDLER(emscripten_get_longjmp_result, { return getAssign(CI) + "tempRet0"; }) +// emscripten instrinsics +DEF_CALL_HANDLER(emscripten_debugger, { + CanValidate = false; + return "debugger"; +}) + // i64 support DEF_CALL_HANDLER(getHigh32, { @@ -221,8 +263,6 @@ DEF_CALL_HANDLER(llvm_nacl_atomic_store_i32, { #define WRITE_LOOP_MAX 128 DEF_CALL_HANDLER(llvm_memcpy_p0i8_p0i8_i32, { - Declares.insert("memcpy"); - Redirects["llvm_memcpy_p0i8_p0i8_i32"] = "memcpy"; if (CI) { ConstantInt *AlignInt = dyn_cast(CI->getOperand(3)); if (AlignInt) { @@ -254,7 +294,7 @@ DEF_CALL_HANDLER(llvm_memcpy_p0i8_p0i8_i32, { } else { // emit a loop UsedVars["dest"] = UsedVars["src"] = UsedVars["stop"] = Type::getInt32Ty(TheModule->getContext())->getTypeID(); - Ret += "dest=" + Dest + "+" + utostr(Pos) + "|0; src=" + Src + "+" + utostr(Pos) + "|0; stop=dest+" + utostr(CurrLen) + "|0; do { " + getHeapAccess("dest", Align) + "=" + getHeapAccess("src", Align) + "|0; dest=dest+" + utostr(Align) + "|0; src=src+" + utostr(Align) + "|0; } while ((dest|0) < (stop|0));"; + Ret += "dest=" + Dest + "+" + utostr(Pos) + "|0; src=" + Src + "+" + utostr(Pos) + "|0; stop=dest+" + utostr(CurrLen) + "|0; do { " + getHeapAccess("dest", Align) + "=" + getHeapAccess("src", Align) + "|0; dest=dest+" + utostr(Align) + "|0; src=src+" + utostr(Align) + "|0; } while ((dest|0) < (stop|0))"; } Pos += CurrLen; Len -= CurrLen; @@ -265,12 +305,11 @@ DEF_CALL_HANDLER(llvm_memcpy_p0i8_p0i8_i32, { } } } + Declares.insert("memcpy"); return CH___default__(CI, "_memcpy", 3) + "|0"; }) DEF_CALL_HANDLER(llvm_memset_p0i8_i32, { - Declares.insert("memset"); - Redirects["llvm_memset_p0i8_i32"] = "memset"; if (CI) { ConstantInt *AlignInt = dyn_cast(CI->getOperand(3)); if (AlignInt) { @@ -309,7 +348,7 @@ DEF_CALL_HANDLER(llvm_memset_p0i8_i32, { } else { // emit a loop UsedVars["dest"] = UsedVars["stop"] = Type::getInt32Ty(TheModule->getContext())->getTypeID(); - Ret += "dest=" + Dest + "+" + utostr(Pos) + "|0; stop=dest+" + utostr(CurrLen) + "|0; do { " + getHeapAccess("dest", Align) + "=" + utostr(FullVal) + "|0; dest=dest+" + utostr(Align) + "|0; } while ((dest|0) < (stop|0));"; + Ret += "dest=" + Dest + "+" + utostr(Pos) + "|0; stop=dest+" + utostr(CurrLen) + "|0; do { " + getHeapAccess("dest", Align) + "=" + utostr(FullVal) + "|0; dest=dest+" + utostr(Align) + "|0; } while ((dest|0) < (stop|0))"; } Pos += CurrLen; Len -= CurrLen; @@ -321,12 +360,12 @@ DEF_CALL_HANDLER(llvm_memset_p0i8_i32, { } } } + Declares.insert("memset"); return CH___default__(CI, "_memset", 3) + "|0"; }) DEF_CALL_HANDLER(llvm_memmove_p0i8_p0i8_i32, { Declares.insert("memmove"); - Redirects["llvm_memmove_p0i8_p0i8_i32"] = "memmove"; return CH___default__(CI, "_memmove", 3) + "|0"; }) @@ -450,13 +489,11 @@ DEF_CALL_HANDLER(name, { \ /* FIXME: do not redirect if this is implemented and not just a declare! */ \ Declares.insert(#to); \ Redirects[#name] = #to; \ - if (!CI) return ""; \ return CH___default__(CI, "_" #to); \ }) #define DEF_BUILTIN_HANDLER(name, to) \ DEF_CALL_HANDLER(name, { \ - if (!CI) return ""; \ return CH___default__(CI, #to); \ }) @@ -702,9 +739,9 @@ DEF_REDIRECT_HANDLER(SDL_RWFromMem, SDL_RWFromConstMem); // Setups void setupCallHandlers() { - CallHandlers = new CallHandlerMap; + assert(CallHandlers.empty()); #define SETUP_CALL_HANDLER(Ident) \ - (*CallHandlers)["_" #Ident] = &JSWriter::CH_##Ident; + CallHandlers["_" #Ident] = &JSWriter::CH_##Ident; SETUP_CALL_HANDLER(__default__); SETUP_CALL_HANDLER(emscripten_preinvoke); @@ -716,6 +753,7 @@ void setupCallHandlers() { SETUP_CALL_HANDLER(emscripten_longjmp); SETUP_CALL_HANDLER(emscripten_check_longjmp); SETUP_CALL_HANDLER(emscripten_get_longjmp_result); + SETUP_CALL_HANDLER(emscripten_debugger); SETUP_CALL_HANDLER(getHigh32); SETUP_CALL_HANDLER(setHigh32); SETUP_CALL_HANDLER(FtoILow); @@ -1006,7 +1044,9 @@ void setupCallHandlers() { std::string handleCall(const Instruction *CI) { const Value *CV = getActuallyCalledValue(CI); - assert(!isa(CV) && "asm() not supported, use EM_ASM() (see emscripten.h)"); + if (isa(CV)) { + report_fatal_error("asm() not supported, use EM_ASM() (see emscripten.h)"); + } // Get the name to call this function by. If it's a direct call, meaning // which know which Function we're calling, avoid calling getValueAsStr, as @@ -1014,10 +1054,10 @@ std::string handleCall(const Instruction *CI) { const std::string &Name = isa(CV) ? getJSName(CV) : getValueAsStr(CV); unsigned NumArgs = getNumArgOperands(CI); - CallHandlerMap::iterator CH = CallHandlers->find("___default__"); + CallHandlerMap::iterator CH = CallHandlers.find("___default__"); if (isa(CV)) { - CallHandlerMap::iterator Custom = CallHandlers->find(Name); - if (Custom != CallHandlers->end()) CH = Custom; + CallHandlerMap::iterator Custom = CallHandlers.find(Name); + if (Custom != CallHandlers.end()) CH = Custom; } return (this->*(CH->second))(CI, Name, NumArgs); } diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp index 8a811b69589..d4fbdad1ee7 100644 --- a/lib/Target/JSBackend/JSBackend.cpp +++ b/lib/Target/JSBackend/JSBackend.cpp @@ -15,13 +15,14 @@ //===----------------------------------------------------------------------===// #include "JSTargetMachine.h" +#include "MCTargetDesc/JSBackendMCTargetDesc.h" +#include "AllocaManager.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Config/config.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InlineAsm.h" @@ -30,9 +31,6 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" #include "llvm/Support/CallSite.h" @@ -40,6 +38,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/DebugInfo.h" #include @@ -51,10 +50,6 @@ using namespace llvm; #include #include -#ifdef _MSC_VER -#define snprintf _snprintf -#endif - #ifdef NDEBUG #undef assert #define assert(x) { if (!(x)) report_fatal_error(#x); } @@ -75,6 +70,11 @@ ReservedFunctionPointers("emscripten-reserved-function-pointers", cl::desc("Number of reserved slots in function tables for functions to be added at runtime (see emscripten RESERVED_FUNCTION_POINTERS option)"), cl::init(0)); +static cl::opt +EmscriptenAssertions("emscripten-assertions", + cl::desc("Additional JS-specific assertions (see emscripten ASSERTIONS)"), + cl::init(0)); + extern "C" void LLVMInitializeJSBackendTarget() { // Register the target. RegisterTargetMachine X(TheJSBackendTarget); @@ -96,7 +96,6 @@ namespace { typedef std::vector HeapData; typedef std::pair Address; typedef std::map VarMap; - typedef std::map AllocaIntMap; typedef std::map GlobalAddressMap; typedef std::vector FunctionTable; typedef std::map FunctionTableMap; @@ -114,8 +113,7 @@ namespace { unsigned UniqueNum; ValueMap ValueNames; VarMap UsedVars; - AllocaIntMap StackAllocs; - unsigned TotalStackAllocs; + AllocaManager Allocas; HeapData GlobalData8; HeapData GlobalData32; HeapData GlobalData64; @@ -131,15 +129,19 @@ namespace { std::vector Exports; // additional exports BlockAddressMap BlockAddresses; + bool CanValidate; bool UsesSIMD; int InvokeState; // cycles between 0, 1 after preInvoke, 2 after call, 0 again after postInvoke. hackish, no argument there. + CodeGenOpt::Level OptLevel; DataLayout *DL; #include "CallHandlers.h" public: static char ID; - explicit JSWriter(formatted_raw_ostream &o) : ModulePass(ID), Out(o), UniqueNum(0), UsesSIMD(false), InvokeState(0) {} + JSWriter(formatted_raw_ostream &o, CodeGenOpt::Level OptLevel) + : ModulePass(ID), Out(o), UniqueNum(0), CanValidate(true), UsesSIMD(false), InvokeState(0), + OptLevel(OptLevel) {} virtual const char *getPassName() const { return "JavaScript backend"; } @@ -167,12 +169,14 @@ namespace { #define MEM_ALIGN 8 #define MEM_ALIGN_BITS 64 + #define STACK_ALIGN 16 + #define STACK_ALIGN_BITS 128 - unsigned memAlign(unsigned x) { - return x + (x%MEM_ALIGN != 0 ? MEM_ALIGN - x%MEM_ALIGN : 0); + unsigned stackAlign(unsigned x) { + return RoundUpToAlignment(x, STACK_ALIGN); } - std::string memAlignStr(std::string x) { - return "((" + x + "+" + utostr(MEM_ALIGN-1) + ")&-" + utostr(MEM_ALIGN) + ")"; + std::string stackAlignStr(std::string x) { + return "((" + x + "+" + utostr(STACK_ALIGN-1) + ")&-" + utostr(STACK_ALIGN) + ")"; } HeapData *allocateAddress(const std::string& Name, unsigned Bits = MEM_ALIGN_BITS) { @@ -258,14 +262,6 @@ namespace { } else return 'i'; } std::string getFunctionSignature(const FunctionType *F, const std::string *Name=NULL) { - if (Name) { - // special-case some function signatures, because of how we emit code for them FIXME this is hackish - if (*Name == "_llvm_memcpy_p0i8_p0i8_i32" || *Name == "_memcpy" || - *Name == "_llvm_memset_p0i8_i32" || *Name == "_memset" || - *Name == "_llvm_memmove_p0i8_p0i8_i32" || *Name == "_memmove") { - return "iiii"; - } - } std::string Ret; Ret += getFunctionSignatureLetter(F->getReturnType()); for (FunctionType::param_iterator AI = F->param_begin(), @@ -294,8 +290,8 @@ namespace { IndexedFunctions[Name] = Index; // invoke the callHandler for this, if there is one. the function may only be indexed but never called directly, and we may need to do things in the handler - CallHandlerMap::const_iterator CH = CallHandlers->find(Name); - if (CH != CallHandlers->end()) { + CallHandlerMap::const_iterator CH = CallHandlers.find(Name); + if (CH != CallHandlers.end()) { (this->*(CH->second))(NULL, Name, -1); } @@ -314,9 +310,26 @@ namespace { return getBlockAddress(BA->getFunction(), BA->getBasicBlock()); } + const Value *resolveFully(const Value *V) { + bool More = true; + while (More) { + More = false; + if (const GlobalAlias *GA = dyn_cast(V)) { + V = GA->getAliasee(); + More = true; + } + if (const ConstantExpr *CE = dyn_cast(V)) { + V = CE->getOperand(0); // ignore bitcasts + More = true; + } + } + return V; + } + // Return a constant we are about to write into a global as a numeric offset. If the // value is not known at compile time, emit a postSet to that location. unsigned getConstAsOffset(const Value *V, unsigned AbsoluteTarget) { + V = resolveFully(V); if (const Function *F = dyn_cast(V)) { return getFunctionIndex(F); } else if (const BlockAddress *BA = dyn_cast(V)) { @@ -352,6 +365,7 @@ namespace { assert(VT->getElementType()->getPrimitiveSizeInBits() == 32); assert(VT->getNumElements() == 4); UsesSIMD = true; + CanValidate = false; } std::string getPtrLoad(const Value* Ptr); @@ -422,6 +436,10 @@ static inline char halfCharToHex(unsigned char half) { } static inline void sanitizeGlobal(std::string& str) { + // Global names are prefixed with "_" to prevent them from colliding with + // names of things in normal JS. + str = "_" + str; + // functions and globals should already be in C-style format, // in addition to . for llvm intrinsics and possibly $ and so forth. // There is a risk of collisions here, we just lower all these @@ -435,6 +453,10 @@ static inline void sanitizeGlobal(std::string& str) { } static inline void sanitizeLocal(std::string& str) { + // Local names are prefixed with "$" to prevent them from colliding with + // global names. + str = "$" + str; + // We need to convert every string that is not a valid JS identifier into // a valid one, without collisions - we cannot turn "x.a" into "x_a" while // also leaving "x_a" as is, for example. @@ -510,7 +532,10 @@ std::string JSWriter::getPhiCode(const BasicBlock *From, const BasicBlock *To) { // we found it const std::string &name = getJSName(P); assigns[name] = getAssign(P); - const Value *V = P->getIncomingValue(index); + // Get the operand, and strip pointer casts, since normal expression + // translation also strips pointer casts, and we want to see the same + // thing so that we can detect any resulting dependencies. + const Value *V = P->getIncomingValue(index)->stripPointerCasts(); values[name] = V; std::string vname = getValueAsStr(V); if (const Instruction *VI = dyn_cast(V)) { @@ -556,18 +581,29 @@ const std::string &JSWriter::getJSName(const Value* val) { if (I != ValueNames.end() && I->first == val) return I->second; + // If this is an alloca we've replaced with another, use the other name. + if (const AllocaInst *AI = dyn_cast(val)) { + if (AI->isStaticAlloca()) { + const AllocaInst *Rep = Allocas.getRepresentative(AI); + if (Rep != AI) { + return getJSName(Rep); + } + } + } + std::string name; if (val->hasName()) { - if (isa(val) || isa(val)) { - name = std::string("_") + val->getName().str(); - sanitizeGlobal(name); - } else { - name = std::string("$") + val->getName().str(); - sanitizeLocal(name); - } + name = val->getName().str(); } else { - name = "u$" + utostr(UniqueNum++); + name = utostr(UniqueNum++); } + + if (isa(val)) { + sanitizeGlobal(name); + } else { + sanitizeLocal(name); + } + return ValueNames[val] = name; } @@ -931,6 +967,11 @@ std::string JSWriter::getConstant(const Constant* CV, AsmCast sign) { Externals.insert(Name); return Name; } + if (const GlobalAlias *GA = dyn_cast(CV)) { + // Since we don't currently support linking of our output, we don't need + // to worry about weak or other kinds of aliases. + return getConstant(GA->getAliasee(), sign); + } return utostr(getGlobalAddress(GV->getName().str())); } @@ -998,6 +1039,9 @@ std::string JSWriter::getConstant(const Constant* CV, AsmCast sign) { } std::string JSWriter::getValueAsStr(const Value* V, AsmCast sign) { + // Skip past no-op bitcasts and zero-index geps. + V = V->stripPointerCasts(); + if (const Constant *CV = dyn_cast(V)) { return getConstant(CV, sign); } else { @@ -1006,6 +1050,9 @@ std::string JSWriter::getValueAsStr(const Value* V, AsmCast sign) { } std::string JSWriter::getValueAsCastStr(const Value* V, AsmCast sign) { + // Skip past no-op bitcasts and zero-index geps. + V = V->stripPointerCasts(); + if (isa(V) || isa(V)) { return getConstant(cast(V), sign); } else { @@ -1014,6 +1061,9 @@ std::string JSWriter::getValueAsCastStr(const Value* V, AsmCast sign) { } std::string JSWriter::getValueAsParenStr(const Value* V) { + // Skip past no-op bitcasts and zero-index geps. + V = V->stripPointerCasts(); + if (const Constant *CV = dyn_cast(V)) { return getConstant(CV); } else { @@ -1022,6 +1072,9 @@ std::string JSWriter::getValueAsParenStr(const Value* V) { } std::string JSWriter::getValueAsCastParenStr(const Value* V, AsmCast sign) { + // Skip past no-op bitcasts and zero-index geps. + V = V->stripPointerCasts(); + if (isa(V) || isa(V)) { return getConstant(cast(V), sign); } else { @@ -1154,6 +1207,12 @@ static uint64_t LSBMask(unsigned numBits) { // Generate code for and operator, either an Instruction or a ConstantExpr. void JSWriter::generateExpression(const User *I, raw_string_ostream& Code) { + // To avoid emiting code and variables for the no-op pointer bitcasts + // and all-zero-index geps that LLVM needs to satisfy its type system, we + // call stripPointerCasts() on all values before translating them. This + // includes bitcasts whose only use is lifetime marker intrinsics. + assert(I == I->stripPointerCasts()); + Type *T = I->getType(); if (T->isIntegerTy() && T->getIntegerBitWidth() > 32) { errs() << *I << "\n"; @@ -1341,30 +1400,39 @@ void JSWriter::generateExpression(const User *I, raw_string_ostream& Code) { break; } case Instruction::Alloca: { - if (NativizedVars.count(I)) { + const AllocaInst* AI = cast(I); + + if (NativizedVars.count(AI)) { // nativized stack variable, we just need a 'var' definition - UsedVars[getJSName(I)] = cast(I->getType())->getElementType()->getTypeID(); + UsedVars[getJSName(AI)] = AI->getType()->getElementType()->getTypeID(); return; } - const AllocaInst* AI = cast(I); - AllocaIntMap::iterator AIMI = StackAllocs.find(AI); - if (AIMI != StackAllocs.end()) { - // fixed-size allocation that is already taken into account in the big initial allocation - if (AIMI->second) { - Code << getAssign(AI) << "sp + " << utostr(AIMI->second) << "|0"; - } else { - Code << getAssign(AI) << "sp"; + + // Fixed-size entry-block allocations are allocated all at once in the + // function prologue. + if (AI->isStaticAlloca()) { + uint64_t Offset; + if (Allocas.getFrameOffset(AI, &Offset)) { + if (Offset != 0) { + Code << getAssign(AI) << "sp + " << Offset << "|0"; + } else { + Code << getAssign(AI) << "sp"; + } + break; } - break; + // Otherwise, this alloca is being represented by another alloca, so + // there's nothing to print. + return; } + Type *T = AI->getAllocatedType(); std::string Size; uint64_t BaseSize = DL->getTypeAllocSize(T); const Value *AS = AI->getArraySize(); if (const ConstantInt *CI = dyn_cast(AS)) { - Size = Twine(memAlign(BaseSize * CI->getZExtValue())).str(); + Size = Twine(stackAlign(BaseSize * CI->getZExtValue())).str(); } else { - Size = memAlignStr("((" + utostr(BaseSize) + '*' + getValueAsStr(AS) + ")|0)"); + Size = stackAlignStr("((" + utostr(BaseSize) + '*' + getValueAsStr(AS) + ")|0)"); } Code << getAssign(AI) << "STACKTOP; STACKTOP = STACKTOP + " << Size << "|0"; break; @@ -1463,7 +1531,7 @@ void JSWriter::generateExpression(const User *I, raw_string_ostream& Code) { case Instruction::ZExt: Code << getValueAsCastStr(I->getOperand(0), ASM_UNSIGNED); break; case Instruction::FPExt: { if (PreciseF32) { - Code << "+" + getValueAsStr(I->getOperand(0)); break; + Code << "+" << getValueAsStr(I->getOperand(0)); break; } else { Code << getValueAsStr(I->getOperand(0)); break; } @@ -1491,10 +1559,10 @@ void JSWriter::generateExpression(const User *I, raw_string_ostream& Code) { std::string V = getValueAsStr(I->getOperand(0)); if (InType->isIntegerTy() && OutType->isFloatingPointTy()) { assert(InType->getIntegerBitWidth() == 32); - Code << "(HEAP32[tempDoublePtr>>2]=" << V << "," << getCast("HEAPF32[tempDoublePtr>>2]", Type::getFloatTy(TheModule->getContext())) + ")"; + Code << "(HEAP32[tempDoublePtr>>2]=" << V << "," << getCast("HEAPF32[tempDoublePtr>>2]", Type::getFloatTy(TheModule->getContext())) << ")"; } else if (OutType->isIntegerTy() && InType->isFloatingPointTy()) { assert(OutType->getIntegerBitWidth() == 32); - Code << "(HEAPF32[tempDoublePtr>>2]=" << V << "," << "HEAP32[tempDoublePtr>>2]|0)"; + Code << "(HEAPF32[tempDoublePtr>>2]=" << V << "," "HEAP32[tempDoublePtr>>2]|0)"; } else { Code << V; } @@ -1586,7 +1654,9 @@ void JSWriter::addBlock(const BasicBlock *BB, Relooper& R, LLVMToRelooperMap& LL raw_string_ostream CodeStream(Code); for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - generateExpression(I, CodeStream); + if (I->stripPointerCasts() == I) { + generateExpression(I, CodeStream); + } } CodeStream.flush(); const Value* Condition = considerConditionVar(BB->getTerminator()); @@ -1746,8 +1816,8 @@ void JSWriter::printFunctionBody(const Function *F) { // Emit stack entry Out << " " << getAdHocAssign("sp", Type::getInt32Ty(F->getContext())) << "STACKTOP;"; - if (TotalStackAllocs) { - Out << "\n " << "STACKTOP = STACKTOP + " + utostr(TotalStackAllocs) + "|0;"; + if (uint64_t FrameSize = Allocas.getFrameSize()) { + Out << "\n " "STACKTOP = STACKTOP + " << FrameSize << "|0;"; } // Emit (relooped) code @@ -1786,59 +1856,24 @@ void JSWriter::processConstants() { void JSWriter::printFunction(const Function *F) { ValueNames.clear(); - // Ensure all arguments and locals are named (we assume used values need names, which might be false if the optimizer did not run) - unsigned Next = 1; - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) { - if (!AI->hasName() && !AI->use_empty()) { - ValueNames[AI] = "$" + utostr(Next++); - } - } - for (Function::const_iterator BI = F->begin(), BE = F->end(); - BI != BE; ++BI) { - for (BasicBlock::const_iterator II = BI->begin(), E = BI->end(); - II != E; ++II) { - if (!II->hasName() && !II->use_empty()) { - ValueNames[II] = "$" + utostr(Next++); - } - } - } - // Prepare and analyze function UsedVars.clear(); UniqueNum = 0; - calculateNativizedVars(F); - StackAllocs.clear(); - TotalStackAllocs = 0; + // When optimizing, the regular optimizer (mem2reg, SROA, GVN, and others) + // will have already taken all the opportunities for nativization. + if (OptLevel == CodeGenOpt::None) + calculateNativizedVars(F); - for (Function::const_iterator BI = F->begin(), BE = F->end(); BI != BE; ++BI) { - for (BasicBlock::const_iterator II = BI->begin(), E = BI->end(); II != E; ++II) { - if (const AllocaInst* AI = dyn_cast(II)) { - Type *T = AI->getAllocatedType(); - const Value *AS = AI->getArraySize(); - unsigned BaseSize = DL->getTypeAllocSize(T); - if (const ConstantInt *CI = dyn_cast(AS)) { - // TODO: group by alignment to avoid unnecessary padding - unsigned Size = memAlign(BaseSize * CI->getZExtValue()); - StackAllocs[AI] = TotalStackAllocs; - TotalStackAllocs += Size; - } - } else { - // stop after the first non-alloca - could alter the stack - // however, ptrtoints are ok, and the legalizaton passes introduce them - if (!isa(II)) break; - } - } - break; - } + // Do alloca coloring at -O1 and higher. + Allocas.analyze(*F, *DL, OptLevel != CodeGenOpt::None); // Emit the function std::string Name = F->getName(); sanitizeGlobal(Name); - Out << "function _" << Name << "("; + Out << "function " << Name << "("; for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI) { if (AI != F->arg_begin()) Out << ","; @@ -1855,6 +1890,8 @@ void JSWriter::printFunction(const Function *F) { printFunctionBody(F); Out << "}"; nl(Out); + + Allocas.clear(); } void JSWriter::printModuleBody() { @@ -1864,21 +1901,6 @@ void JSWriter::printModuleBody() { nl(Out) << "// EMSCRIPTEN_START_FUNCTIONS"; nl(Out); for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); I != E; ++I) { - // Ignore intrinsics that are always no-ops. We don't emit any code for - // them, so we don't need to declare them. - if (I->isIntrinsic()) { - switch (I->getIntrinsicID()) { - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::prefetch: - continue; - } - } - if (!I->isDeclaration()) printFunction(I); } Out << "function runPostSets() {\n"; @@ -1911,6 +1933,26 @@ void JSWriter::printModuleBody() { for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); I != E; ++I) { if (I->isDeclaration() && !I->use_empty()) { + // Ignore intrinsics that are always no-ops or expanded into other code + // which doesn't require the intrinsic function itself to be declared. + if (I->isIntrinsic()) { + switch (I->getIntrinsicID()) { + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::prefetch: + case Intrinsic::memcpy: + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::expect: + case Intrinsic::flt_rounds: + continue; + } + } + if (first) { first = false; } else { @@ -2014,6 +2056,10 @@ void JSWriter::printModuleBody() { } Out << "],"; + Out << "\"canValidate\": "; + Out << (CanValidate ? "1" : "0"); + Out << ","; + Out << "\"simd\": "; Out << (UsesSIMD ? "1" : "0"); Out << ","; @@ -2089,8 +2135,6 @@ void JSWriter::parseConstant(const std::string& name, const Constant* CV, bool c } else if (isa(CV)) { if (calculate) { unsigned Bytes = DL->getTypeStoreSize(CV->getType()); - // FIXME: assume full 64-bit alignment for now - Bytes = memAlign(Bytes); HeapData *GlobalData = allocateAddress(name); for (unsigned i = 0; i < Bytes; ++i) { GlobalData->push_back(0); @@ -2319,9 +2363,16 @@ bool JSTargetMachine::addPassesToEmitFile(PassManagerBase &PM, assert(FileType == TargetMachine::CGFT_AssemblyFile); PM.add(createExpandI64Pass()); - PM.add(createSimplifyAllocasPass()); - PM.add(new JSWriter(o)); + + CodeGenOpt::Level OptLevel = getOptLevel(); + + // When optimizing, there shouldn't be any opportunities for SimplifyAllocas + // because the regular optimizer should have taken them all (GVN, and possibly + // also SROA). + if (OptLevel == CodeGenOpt::None) + PM.add(createSimplifyAllocasPass()); + + PM.add(new JSWriter(o, OptLevel)); return false; } - diff --git a/lib/Target/JSBackend/JSTargetMachine.cpp b/lib/Target/JSBackend/JSTargetMachine.cpp new file mode 100644 index 00000000000..af428f2eb2d --- /dev/null +++ b/lib/Target/JSBackend/JSTargetMachine.cpp @@ -0,0 +1,14 @@ +#include "JSTargetMachine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/PassManager.h" +using namespace llvm; + +JSTargetMachine::JSTargetMachine(const Target &T, StringRef Triple, + StringRef CPU, StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : TargetMachine(T, Triple, CPU, FS, Options), + DL("e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-" + "f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128") { + CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); +} diff --git a/lib/Target/JSBackend/JSTargetMachine.h b/lib/Target/JSBackend/JSTargetMachine.h index b11533d6922..3912d3b5b29 100644 --- a/lib/Target/JSBackend/JSTargetMachine.h +++ b/lib/Target/JSBackend/JSTargetMachine.h @@ -1,4 +1,4 @@ -//===-- JSTargetMachine.h - TargetMachine for the C++ backend --*- C++ -*-===// +//===-- JSTargetMachine.h - TargetMachine for the JS Backend ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -22,12 +22,14 @@ namespace llvm { class formatted_raw_ostream; -struct JSTargetMachine : public TargetMachine { - JSTargetMachine(const Target &T, StringRef TT, +class JSTargetMachine : public TargetMachine { + const DataLayout DL; + +public: + JSTargetMachine(const Target &T, StringRef Triple, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : TargetMachine(T, TT, CPU, FS, Options) {} + CodeGenOpt::Level OL); virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, @@ -36,12 +38,9 @@ struct JSTargetMachine : public TargetMachine { AnalysisID StartAfter, AnalysisID StopAfter); - virtual const DataLayout *getDataLayout() const { return 0; } + virtual const DataLayout *getDataLayout() const { return &DL; } }; -extern Target TheJSBackendTarget; - } // End llvm namespace - #endif diff --git a/lib/Target/JSBackend/LLVMBuild.txt b/lib/Target/JSBackend/LLVMBuild.txt index 33b433bfb7b..6601b0306ae 100644 --- a/lib/Target/JSBackend/LLVMBuild.txt +++ b/lib/Target/JSBackend/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = TargetInfo +subdirectories = MCTargetDesc TargetInfo [component_0] type = TargetGroup @@ -27,5 +27,5 @@ parent = Target type = Library name = JSBackendCodeGen parent = JSBackend -required_libraries = Core JSBackendInfo Support Target +required_libraries = Core JSBackendInfo JSBackendDesc Support Target add_to_library_groups = JSBackend diff --git a/lib/Target/JSBackend/MCTargetDesc/CMakeLists.txt b/lib/Target/JSBackend/MCTargetDesc/CMakeLists.txt new file mode 100644 index 00000000000..81c5eadef6a --- /dev/null +++ b/lib/Target/JSBackend/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_library(LLVMJSBackendDesc + JSBackendMCTargetDesc.cpp + ) + +# Hack: we need to include 'main' target directory to grab private headers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.cpp b/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.cpp new file mode 100644 index 00000000000..f7ba0686839 --- /dev/null +++ b/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.cpp @@ -0,0 +1,31 @@ +//===-- JSBackendMCTargetDesc.cpp - JS Backend Target Descriptions --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides asm.js specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "JSBackendMCTargetDesc.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +static MCCodeGenInfo *createJSBackendMCCodeGenInfo(StringRef Triple, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} + +// Force static initialization. +extern "C" void LLVMInitializeJSBackendTargetMC() { + // Register the MC codegen info. + RegisterMCCodeGenInfoFn C(TheJSBackendTarget, createJSBackendMCCodeGenInfo); +} diff --git a/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.h b/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.h new file mode 100644 index 00000000000..c98a55df83b --- /dev/null +++ b/lib/Target/JSBackend/MCTargetDesc/JSBackendMCTargetDesc.h @@ -0,0 +1,25 @@ +//===- JSBackendMCTargetDesc.h - JS Backend Target Descriptions -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides asm.js specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef JSBACKENDMCTARGETDESC_H +#define JSBACKENDMCTARGETDESC_H + +#include "llvm/Support/TargetRegistry.h" + +namespace llvm { + +extern Target TheJSBackendTarget; + +} // End llvm namespace + +#endif diff --git a/lib/Target/JSBackend/MCTargetDesc/LLVMBuild.txt b/lib/Target/JSBackend/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 00000000000..91127251c9d --- /dev/null +++ b/lib/Target/JSBackend/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/JSBackend/MCTargetDesc/LLVMBuild.txt --------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = JSBackendDesc +parent = JSBackend +required_libraries = MC Support JSBackendInfo +add_to_library_groups = JSBackend diff --git a/lib/Target/JSBackend/MCTargetDesc/Makefile b/lib/Target/JSBackend/MCTargetDesc/Makefile new file mode 100644 index 00000000000..9bf7e902aff --- /dev/null +++ b/lib/Target/JSBackend/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/JSBackend/TargetDesc/Makefile ------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMJSBackendDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/JSBackend/Makefile b/lib/Target/JSBackend/Makefile index 8a15e7f4c18..8d8336bdbd4 100644 --- a/lib/Target/JSBackend/Makefile +++ b/lib/Target/JSBackend/Makefile @@ -9,7 +9,7 @@ LEVEL = ../../.. LIBRARYNAME = LLVMJSBackendCodeGen -DIRS = TargetInfo +DIRS = MCTargetDesc TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/JSBackend/OptPasses.h b/lib/Target/JSBackend/OptPasses.h index 81e3b5ed184..2f90b568b01 100644 --- a/lib/Target/JSBackend/OptPasses.h +++ b/lib/Target/JSBackend/OptPasses.h @@ -1,4 +1,4 @@ -//===-- JSTargetMachine.h - TargetMachine for the C++ backend --*- C++ -*-===// +//===-- JSTargetMachine.h - TargetMachine for the JS Backend ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/JSBackend/SimplifyAllocas.cpp b/lib/Target/JSBackend/SimplifyAllocas.cpp index 858ded32a10..a6c090e7e44 100644 --- a/lib/Target/JSBackend/SimplifyAllocas.cpp +++ b/lib/Target/JSBackend/SimplifyAllocas.cpp @@ -1,4 +1,4 @@ -//===-- SimplifyAllocas.cpp - TargetMachine for the C++ backend --*- C++ -*-===// +//===-- SimplifyAllocas.cpp - Alloca optimization ---------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,29 +6,23 @@ // License. See LICENSE.TXT for details. // //===-----------------------------------------------------------------------===// +// +// There shouldn't be any opportunities for this pass to do anything if the +// regular LLVM optimizer passes are run. However, it does make things nicer +// at -O0. +// +//===-----------------------------------------------------------------------===// -#include +#include "OptPasses.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Function.h" -// XXX -#include "llvm/Support/FormattedStream.h" -#include -#define dump(x) fprintf(stderr, x "\n") -#define dumpv(x, ...) fprintf(stderr, x "\n", __VA_ARGS__) -#define dumpfail(x) { fprintf(stderr, x "\n"); fprintf(stderr, "%s : %d\n", __FILE__, __LINE__); report_fatal_error("fail"); } -#define dumpfailv(x, ...) { fprintf(stderr, x "\n", __VA_ARGS__); fprintf(stderr, "%s : %d\n", __FILE__, __LINE__); report_fatal_error("fail"); } -#define dumpIR(value) { \ - std::string temp; \ - raw_string_ostream stream(temp); \ - stream << *(value); \ - fprintf(stderr, "%s\n", temp.c_str()); \ -} +#ifdef NDEBUG #undef assert -#define assert(x) { if (!(x)) dumpfail(#x); } -// XXX +#define assert(x) { if (!(x)) report_fatal_error(#x); } +#endif namespace llvm { @@ -115,4 +109,3 @@ extern FunctionPass *createSimplifyAllocasPass() { } } // End llvm namespace - diff --git a/lib/Target/JSBackend/TargetInfo/JSBackendTargetInfo.cpp b/lib/Target/JSBackend/TargetInfo/JSBackendTargetInfo.cpp index cf06eaceea8..66a3f4d6e83 100644 --- a/lib/Target/JSBackend/TargetInfo/JSBackendTargetInfo.cpp +++ b/lib/Target/JSBackend/TargetInfo/JSBackendTargetInfo.cpp @@ -8,6 +8,7 @@ //===--------------------------------------------------------------------===// #include "JSTargetMachine.h" +#include "MCTargetDesc/JSBackendMCTargetDesc.h" #include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -15,8 +16,21 @@ using namespace llvm; Target llvm::TheJSBackendTarget; static unsigned JSBackend_TripleMatchQuality(const std::string &TT) { - // This class always works, but shouldn't be the default in most cases. - return 1; + switch (Triple(TT).getArch()) { + case Triple::asmjs: + // That's us! + return 20; + + case Triple::le32: + case Triple::x86: + // For compatibility with older versions of Emscripten, we also basically + // support generating code for le32-unknown-nacl and i386-pc-linux-gnu, + // but we use a low number here so that we're not the default. + return 1; + + default: + return 0; + } } extern "C" void LLVMInitializeJSBackendTargetInfo() { @@ -24,5 +38,3 @@ extern "C" void LLVMInitializeJSBackendTargetInfo() { "JavaScript (asm.js, emscripten) backend", &JSBackend_TripleMatchQuality); } - -extern "C" void LLVMInitializeJSBackendTargetMC() {} diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index cbea844e072..0e628c08a6e 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetLibraryInfo.h" #include using namespace llvm; @@ -473,9 +474,17 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, DataLayout *TD, TargetLibraryInfo *TLI) { bool Changed = false; - SmallVector WorkList(V->use_begin(), V->use_end()); + // Note that we need to use a weak value handle for the worklist items. When + // we delete a constant array, we may also be holding pointer to one of its + // elements (or an element of one of its elements if we're dealing with an + // array of arrays) in the worklist. + SmallVector WorkList(V->use_begin(), V->use_end()); while (!WorkList.empty()) { - User *U = WorkList.pop_back_val(); + Value *UV = WorkList.pop_back_val(); + if (!UV) + continue; + + User *U = cast(UV); if (LoadInst *LI = dyn_cast(U)) { if (Init) { diff --git a/lib/Transforms/NaCl/ExpandVarArgs.cpp b/lib/Transforms/NaCl/ExpandVarArgs.cpp index 1b605b79ee2..6f41046850a 100644 --- a/lib/Transforms/NaCl/ExpandVarArgs.cpp +++ b/lib/Transforms/NaCl/ExpandVarArgs.cpp @@ -240,7 +240,8 @@ static bool ExpandVarArgCall(InstType *Call, DataLayout *DL) { // start of the function so that we don't leak space if the function // is called in a loop. Function *Func = Call->getParent()->getParent(); - Instruction *Buf = new AllocaInst(VarArgsTy, "vararg_buffer"); + AllocaInst *Buf = new AllocaInst(VarArgsTy, "vararg_buffer"); + Buf->setAlignment(8); // XXX EMSCRIPTEN: Align for 8-byte aligned doubles. Func->getEntryBlock().getInstList().push_front(Buf); // Call llvm.lifetime.start/end intrinsics to indicate that Buf is diff --git a/lib/Transforms/NaCl/LowerEmExceptionsPass.cpp b/lib/Transforms/NaCl/LowerEmExceptionsPass.cpp index ac07530ab88..3f6f18274fd 100644 --- a/lib/Transforms/NaCl/LowerEmExceptionsPass.cpp +++ b/lib/Transforms/NaCl/LowerEmExceptionsPass.cpp @@ -99,19 +99,19 @@ bool LowerEmExceptions::runOnModule(Module &M) { Type *i8P = i8->getPointerTo(); Type *Void = Type::getVoidTy(M.getContext()); - if (!TheModule->getFunction("getHigh32")) { + if (!(GetHigh = TheModule->getFunction("getHigh32"))) { FunctionType *GetHighFunc = FunctionType::get(i32, false); GetHigh = Function::Create(GetHighFunc, GlobalValue::ExternalLinkage, "getHigh32", TheModule); } - FunctionType *VoidFunc = FunctionType::get(Void, false); - if (!TheModule->getFunction("emscripten_preinvoke")) { + if (!(PreInvoke = TheModule->getFunction("emscripten_preinvoke"))) { + FunctionType *VoidFunc = FunctionType::get(Void, false); PreInvoke = Function::Create(VoidFunc, GlobalValue::ExternalLinkage, "emscripten_preinvoke", TheModule); } - FunctionType *IntFunc = FunctionType::get(i32, false); - if (!TheModule->getFunction("emscripten_postinvoke")) { + if (!(PostInvoke = TheModule->getFunction("emscripten_postinvoke"))) { + FunctionType *IntFunc = FunctionType::get(i32, false); PostInvoke = Function::Create(IntFunc, GlobalValue::ExternalLinkage, "emscripten_postinvoke", TheModule); } diff --git a/lib/Transforms/NaCl/PNaClABISimplify.cpp b/lib/Transforms/NaCl/PNaClABISimplify.cpp index 4deee01a2b9..886e0cc4cce 100644 --- a/lib/Transforms/NaCl/PNaClABISimplify.cpp +++ b/lib/Transforms/NaCl/PNaClABISimplify.cpp @@ -60,7 +60,9 @@ void llvm::PNaClABISimplifyAddPreOptPasses(PassManager &PM) { // LowerExpect converts Intrinsic::expect into branch weights, // which can then be removed after BlockPlacement. +#if 0 // XXX EMSCRIPTEN: We support the expect intrinsic. PM.add(createLowerExpectIntrinsicPass()); +#endif #if 0 // XXX EMSCRIPTEN: We don't need this. // Rewrite unsupported intrinsics to simpler and portable constructs. PM.add(createRewriteLLVMIntrinsicsPass()); @@ -75,13 +77,17 @@ void llvm::PNaClABISimplifyAddPreOptPasses(PassManager &PM) { PM.add(createExpandVarArgsPass()); PM.add(createExpandCtorsPass()); +#if 0 // XXX EMSCRIPTEN: We handle aliases. PM.add(createResolveAliasesPass()); +#endif #if 0 // EMSCRIPTEN: no need for tls PM.add(createExpandTlsPass()); #endif // GlobalCleanup needs to run after ExpandTls because // __tls_template_start etc. are extern_weak before expansion +#if 0 // XXX EMSCRIPTEN: We don't currently have tls, and we don't have the same complications with extern_weak PM.add(createGlobalCleanupPass()); +#endif } void llvm::PNaClABISimplifyAddPostOptPasses(PassManager &PM) { @@ -138,7 +144,9 @@ void llvm::PNaClABISimplifyAddPostOptPasses(PassManager &PM) { // Remove ``asm("":::"memory")``. This must occur after rewriting // atomics: a ``fence seq_cst`` surrounded by ``asm("":::"memory")`` // has special meaning and is translated differently. +#if 0 // XXX EMSCRIPTEN: asm("":::"memory") does't have special semantics. PM.add(createRemoveAsmMemoryPass()); +#endif #if 0 // XXX EMSCRIPTEN: PNaCl replaces pointers with ints to simplify their ABI; empscripten doesn't need this. // ReplacePtrsWithInts assumes that getelementptr instructions and // ConstantExprs have already been expanded out. @@ -156,7 +164,9 @@ void llvm::PNaClABISimplifyAddPostOptPasses(PassManager &PM) { // Strip dead prototytes to appease the intrinsic ABI checks. // ExpandVarArgs leaves around vararg intrinsics, and // ReplacePtrsWithInts leaves the lifetime.start/end intrinsics. +#if 0 // XXX EMSCRIPTEN: We just ignore dead prototypes. PM.add(createStripDeadPrototypesPass()); +#endif // Eliminate simple dead code that the post-opt passes could have // created. diff --git a/lib/Transforms/NaCl/PromoteIntegers.cpp b/lib/Transforms/NaCl/PromoteIntegers.cpp index af34faa7e55..ed374da3dda 100644 --- a/lib/Transforms/NaCl/PromoteIntegers.cpp +++ b/lib/Transforms/NaCl/PromoteIntegers.cpp @@ -577,18 +577,38 @@ void PromoteIntegers::convertInstruction(Instruction *Inst, ConversionState &Sta State.getConverted(Binop->getOperand(1)), Binop->getName() + ".result", Binop), Binop); break; + // XXX EMSCRIPTEN: Implement {U,S}{Div,Rem} + case Instruction::UDiv: + case Instruction::URem: + NewInst = CopyDebug(BinaryOperator::Create( + Binop->getOpcode(), + getClearConverted(Binop->getOperand(0), + Binop, + State), + getClearConverted(Binop->getOperand(1), + Binop, + State), + Binop->getName() + ".result", Binop), Binop); + break; + case Instruction::SDiv: + case Instruction::SRem: + NewInst = CopyDebug(BinaryOperator::Create( + Binop->getOpcode(), + getSignExtend(State.getConverted(Binop->getOperand(0)), + Binop->getOperand(0), + Binop), + getSignExtend(State.getConverted(Binop->getOperand(1)), + Binop->getOperand(0), + Binop), + Binop->getName() + ".result", Binop), Binop); + break; case Instruction::FAdd: case Instruction::FSub: case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: case Instruction::FRem: case Instruction::BinaryOpsEnd: // We should not see FP operators here. - // We don't handle div. errs() << *Inst << "\n"; llvm_unreachable("Cannot handle binary operator"); break; diff --git a/test/CodeGen/JS/aliases.ll b/test/CodeGen/JS/aliases.ll new file mode 100644 index 00000000000..7818f0de8b9 --- /dev/null +++ b/test/CodeGen/JS/aliases.ll @@ -0,0 +1,57 @@ +; RUN: llc -march=js -o - < %s | FileCheck %s + +; ModuleID = 'test/CodeGen/JS/aliases.ll' +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; ModuleID = 'tests/hello_world.bc' +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +@.str = private unnamed_addr constant [18 x i8] c"hello, world! %d\0A\00", align 1 ; [#uses=1 type=[18 x i8]*] + +@othername = alias internal void (i32)* @doit +@othername2 = alias internal void (i32)* @othername +@othername3 = alias internal void (i32)* @othername2 +@othername4 = alias internal bitcast (void (i32)* @othername2 to void ()*) + +@list = global i32 ptrtoint (void ()* @othername4 to i32) +@list2 = global <{ i32, i32, i32, i32, i32 }> <{ i32 ptrtoint (void (i32)* @doit to i32), i32 ptrtoint (void (i32)* @othername to i32), i32 ptrtoint (void (i32)* @othername2 to i32), i32 ptrtoint (void (i32)* @othername3 to i32), i32 ptrtoint (void ()* @othername4 to i32) }> + + +@value = global i32 17 +@value2 = alias i32* @value +@value3 = alias i32* @value + +define internal void @doit(i32 %x) { + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str, i32 0, i32 0), i32 %x) ; [#uses=0 type=i32] + ret void +} + +;;; we just check for compilation to succeed here, specifically of @list and @list2 +; CHECK: function _main() { +; CHECK: } + +define i32 @main() { +entry: + call void ()* @othername4() + %fp = ptrtoint void ()* @othername4 to i32 + %fp1 = add i32 %fp, 0 + %pf = inttoptr i32 %fp1 to void (i32)* + %x = load i32* @value3 + call void (i32)* %pf(i32 %x) + %x1 = load i32* @value2 + call void (i32)* @othername3(i32 %x1) + %x2 = load i32* @value + call void (i32)* @othername2(i32 %x2) + store i32 18, i32* @value + %x3 = load i32* @value + call void (i32)* @othername(i32 %x3) + store i32 19, i32* @value3 + %x4 = load i32* @value3 + call void (i32)* @doit(i32 %x4) + ret i32 1 +} + +declare i32 @printf(i8*, ...) + diff --git a/test/CodeGen/JS/allocamanager.ll b/test/CodeGen/JS/allocamanager.ll new file mode 100644 index 00000000000..c2f7c5f53d6 --- /dev/null +++ b/test/CodeGen/JS/allocamanager.ll @@ -0,0 +1,166 @@ +; RUN: llc -march=js -o - < %s | FileCheck %s + +; Basic AllocaManager feature test. Eliminate user variable cupcake in favor of +; user variable muffin, and combine all the vararg buffers. And align the stack +; pointer. + +; ModuleID = 'test/CodeGen/JS/allocamanager.ll' +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +%struct._IO_FILE = type opaque + +@stderr = external constant [4 x i8], align 4 +@.str = private unnamed_addr constant [26 x i8] c"hello from %s; argc is %d\00", align 1 +@.str1 = private unnamed_addr constant [33 x i8] c"message from the program: \22%s\22!\0A\00", align 1 +@.str2 = private unnamed_addr constant [38 x i8] c"with argc %d, I, %s, must say goodbye\00", align 1 +@.str3 = private unnamed_addr constant [43 x i8] c"another message from the program: \22%s\22...\0A\00", align 1 + +; CHECK: function _foo($argc,$argv) { +; CHECK-NOT: cupcake +; CHECK: STACKTOP = STACKTOP + 128|0; +; CHECK-NEXT: vararg_buffer0 = +; CHECK-NEXT: $muffin = +; CHECK-NOT: cupcake +; CHECK: } + +; Function Attrs: nounwind +define void @foo(i32 %argc, i8** %argv) #0 { +entry: + %vararg_buffer0 = alloca <{ i8* }>, align 8 + %vararg_lifetime_bitcast10 = bitcast <{ i8* }>* %vararg_buffer0 to i8* + %vararg_buffer5 = alloca <{ i32, i8* }>, align 8 + %vararg_lifetime_bitcast6 = bitcast <{ i32, i8* }>* %vararg_buffer5 to i8* + %vararg_buffer2 = alloca <{ i8* }>, align 8 + %vararg_lifetime_bitcast3 = bitcast <{ i8* }>* %vararg_buffer2 to i8* + %vararg_buffer1 = alloca <{ i8*, i32 }>, align 8 + %vararg_lifetime_bitcast = bitcast <{ i8*, i32 }>* %vararg_buffer1 to i8* + %muffin = alloca [117 x i8], align 1 + %cupcake = alloca [119 x i8], align 1 + %tmp = getelementptr [117 x i8]* %muffin, i32 0, i32 0 + call void @llvm.lifetime.start(i64 117, i8* %tmp) #0 + %tmp1 = load i8** %argv, align 4 + call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast) + %vararg_ptr = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 0 + store i8* %tmp1, i8** %vararg_ptr, align 4 + %vararg_ptr1 = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 1 + store i32 %argc, i32* %vararg_ptr1, align 4 + %call = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i8*, i32 }>*)*)(i8* %tmp, i8* getelementptr inbounds ([26 x i8]* @.str, i32 0, i32 0), <{ i8*, i32 }>* %vararg_buffer1) #0 + call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast) + %tmp2 = load %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4 + call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast3) + %vararg_ptr4 = getelementptr <{ i8* }>* %vararg_buffer2, i32 0, i32 0 + store i8* %tmp, i8** %vararg_ptr4, align 4 + %call2 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp2, i8* getelementptr inbounds ([33 x i8]* @.str1, i32 0, i32 0), <{ i8* }>* %vararg_buffer2) #0 + call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast3) + call void @llvm.lifetime.end(i64 117, i8* %tmp) #0 + %tmp3 = getelementptr [119 x i8]* %cupcake, i32 0, i32 0 + call void @llvm.lifetime.start(i64 119, i8* %tmp3) #0 + %tmp4 = load i8** %argv, align 4 + call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast6) + %vararg_ptr7 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 0 + store i32 %argc, i32* %vararg_ptr7, align 4 + %vararg_ptr8 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 1 + store i8* %tmp4, i8** %vararg_ptr8, align 4 + %call5 = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i32, i8* }>*)*)(i8* %tmp3, i8* getelementptr inbounds ([38 x i8]* @.str2, i32 0, i32 0), <{ i32, i8* }>* %vararg_buffer5) #0 + call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast6) + call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast10) + %vararg_ptr11 = getelementptr <{ i8* }>* %vararg_buffer0, i32 0, i32 0 + store i8* %tmp3, i8** %vararg_ptr11, align 4 + %call7 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp2, i8* getelementptr inbounds ([43 x i8]* @.str3, i32 0, i32 0), <{ i8* }>* %vararg_buffer0) #0 + call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast10) + call void @llvm.lifetime.end(i64 119, i8* %tmp3) #0 + ret void +} + +; CHECK: function _bar($argc,$argv) { +; CHECK-NOT: cupcake +; CHECK: STACKTOP = STACKTOP + 128|0; +; CHECK-NEXT: vararg_buffer0 = +; CHECK-NEXT: $muffin = +; CHECK-NOT: cupcake +; CHECK: } + +; Function Attrs: nounwind +define void @bar(i32 %argc, i8** %argv) #0 { +entry: + %vararg_buffer0 = alloca <{ i8* }>, align 8 + %vararg_lifetime_bitcast10 = bitcast <{ i8* }>* %vararg_buffer0 to i8* + %vararg_buffer5 = alloca <{ i32, i8* }>, align 8 + %vararg_lifetime_bitcast6 = bitcast <{ i32, i8* }>* %vararg_buffer5 to i8* + %vararg_buffer2 = alloca <{ i8* }>, align 8 + %vararg_lifetime_bitcast3 = bitcast <{ i8* }>* %vararg_buffer2 to i8* + %vararg_buffer1 = alloca <{ i8*, i32 }>, align 8 + %vararg_lifetime_bitcast = bitcast <{ i8*, i32 }>* %vararg_buffer1 to i8* + %muffin = alloca [117 x i8], align 1 + %cupcake = alloca [119 x i8], align 1 + %tmp = getelementptr [117 x i8]* %muffin, i32 0, i32 0 + call void @llvm.lifetime.start(i64 117, i8* %tmp) #0 + %cmp = icmp eq i32 %argc, 39 + br i1 %cmp, label %if.end.thread, label %if.end + +if.end.thread: ; preds = %entry + call void @llvm.lifetime.end(i64 117, i8* %tmp) #0 + %tmp1 = getelementptr [119 x i8]* %cupcake, i32 0, i32 0 + call void @llvm.lifetime.start(i64 119, i8* %tmp1) #0 + %.pre = load %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4 + br label %if.then4 + +if.end: ; preds = %entry + %tmp2 = load i8** %argv, align 4 + call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast) + %vararg_ptr = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 0 + store i8* %tmp2, i8** %vararg_ptr, align 4 + %vararg_ptr1 = getelementptr <{ i8*, i32 }>* %vararg_buffer1, i32 0, i32 1 + store i32 %argc, i32* %vararg_ptr1, align 4 + %call = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i8*, i32 }>*)*)(i8* %tmp, i8* getelementptr inbounds ([26 x i8]* @.str, i32 0, i32 0), <{ i8*, i32 }>* %vararg_buffer1) #0 + call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast) + %tmp3 = load %struct._IO_FILE** bitcast ([4 x i8]* @stderr to %struct._IO_FILE**), align 4 + call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast3) + %vararg_ptr4 = getelementptr <{ i8* }>* %vararg_buffer2, i32 0, i32 0 + store i8* %tmp, i8** %vararg_ptr4, align 4 + %call2 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp3, i8* getelementptr inbounds ([33 x i8]* @.str1, i32 0, i32 0), <{ i8* }>* %vararg_buffer2) #0 + call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast3) + call void @llvm.lifetime.end(i64 117, i8* %tmp) #0 + %tmp4 = getelementptr [119 x i8]* %cupcake, i32 0, i32 0 + call void @llvm.lifetime.start(i64 119, i8* %tmp4) #0 + %cmp3 = icmp eq i32 %argc, 45 + br i1 %cmp3, label %if.end10, label %if.then4 + +if.then4: ; preds = %if.end, %if.end.thread + %tmp5 = phi %struct._IO_FILE* [ %.pre, %if.end.thread ], [ %tmp3, %if.end ] + %tmp6 = phi i8* [ %tmp1, %if.end.thread ], [ %tmp4, %if.end ] + %tmp7 = load i8** %argv, align 4 + call void @llvm.lifetime.start(i64 8, i8* %vararg_lifetime_bitcast6) + %vararg_ptr7 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 0 + store i32 %argc, i32* %vararg_ptr7, align 4 + %vararg_ptr8 = getelementptr <{ i32, i8* }>* %vararg_buffer5, i32 0, i32 1 + store i8* %tmp7, i8** %vararg_ptr8, align 4 + %call7 = call i32 bitcast (i32 (i8*, i8*, i8*)* @sprintf to i32 (i8*, i8*, <{ i32, i8* }>*)*)(i8* %tmp6, i8* getelementptr inbounds ([38 x i8]* @.str2, i32 0, i32 0), <{ i32, i8* }>* %vararg_buffer5) #0 + call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast6) + call void @llvm.lifetime.start(i64 4, i8* %vararg_lifetime_bitcast10) + %vararg_ptr11 = getelementptr <{ i8* }>* %vararg_buffer0, i32 0, i32 0 + store i8* %tmp6, i8** %vararg_ptr11, align 4 + %call9 = call i32 bitcast (i32 (%struct._IO_FILE*, i8*, i8*)* @fprintf to i32 (%struct._IO_FILE*, i8*, <{ i8* }>*)*)(%struct._IO_FILE* %tmp5, i8* getelementptr inbounds ([43 x i8]* @.str3, i32 0, i32 0), <{ i8* }>* %vararg_buffer0) #0 + call void @llvm.lifetime.end(i64 4, i8* %vararg_lifetime_bitcast10) + br label %if.end10 + +if.end10: ; preds = %if.then4, %if.end + %tmp8 = phi i8* [ %tmp4, %if.end ], [ %tmp6, %if.then4 ] + call void @llvm.lifetime.end(i64 119, i8* %tmp8) #0 + ret void +} + +; Function Attrs: nounwind +declare i32 @sprintf(i8*, i8*, i8*) #0 + +; Function Attrs: nounwind +declare i32 @fprintf(%struct._IO_FILE*, i8*, i8*) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +attributes #0 = { nounwind } diff --git a/test/CodeGen/JS/asm.ll b/test/CodeGen/JS/asm.ll new file mode 100644 index 00000000000..41a30431da9 --- /dev/null +++ b/test/CodeGen/JS/asm.ll @@ -0,0 +1,13 @@ +; RUN: not llc -march=js < %s + +; Inline asm isn't supported (yet?). llc should report an error when it +; encounters inline asm. +; +; We could support the special case of an empty inline asm string without much +; work, but code that uses such things most likely isn't portable anyway, and +; there are usually much better alternatives. + +define void @foo() { + call void asm "", ""() + ret void +} diff --git a/test/CodeGen/JS/dead-prototypes.ll b/test/CodeGen/JS/dead-prototypes.ll new file mode 100644 index 00000000000..2a723ed61e5 --- /dev/null +++ b/test/CodeGen/JS/dead-prototypes.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=js < %s | not grep printf + +; llc shouldn't emit any code or bookkeeping for unused declarations. + +define void @foo() { + ret void +} + +declare i32 @printf(i8* nocapture, ...) diff --git a/test/CodeGen/JS/expand-i64.ll b/test/CodeGen/JS/expand-i64.ll index fd468fc6d11..a8a1875fa48 100644 --- a/test/CodeGen/JS/expand-i64.ll +++ b/test/CodeGen/JS/expand-i64.ll @@ -1,6 +1,6 @@ ; RUN: opt -S -expand-illegal-ints < %s | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" ; CHECK: define i32 @add(i32, i32, i32, i32) { ; CHECK: %5 = call i32 @i64Add(i32 %0, i32 %1, i32 %2, i32 %3) diff --git a/test/CodeGen/JS/expect-intrinsics.ll b/test/CodeGen/JS/expect-intrinsics.ll new file mode 100644 index 00000000000..b061f52967b --- /dev/null +++ b/test/CodeGen/JS/expect-intrinsics.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=js < %s | FileCheck %s + +; Handle the llvm.expect intrinsic. + +; CHECK: $expval = $x; +; CHECK: $tobool = ($expval|0)!=(0); +define void @foo(i32 %x) { +entry: + %expval = call i32 @llvm.expect.i32(i32 %x, i32 0) + %tobool = icmp ne i32 %expval, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: + call void @callee() + br label %if.end + +if.end: + ret void +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.expect.i32(i32, i32) #0 + +declare void @callee() + +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/JS/ffis-f32.ll b/test/CodeGen/JS/ffis-f32.ll new file mode 100644 index 00000000000..39d3c65b6ce --- /dev/null +++ b/test/CodeGen/JS/ffis-f32.ll @@ -0,0 +1,78 @@ +; RUN: llc -emscripten-precise-f32 -march=js < %s | FileCheck %s + +; Use proper types to ffi calls, with float32 + +; CHECK: (+Math_sqrt(+1)); +; CHECK-NEXT: (Math_fround(Math_sqrt(Math_fround(+1)))); +; CHECK-NEXT: (+Math_sqrt((+$d))); +; CHECK-NEXT: (Math_fround(Math_sqrt((Math_fround($f))))); +; CHECK-NEXT: (+Math_ceil(+1)); +; CHECK-NEXT: (Math_fround(Math_ceil(Math_fround(+1)))); +; CHECK-NEXT: (+Math_floor(+1)); +; CHECK-NEXT: (Math_fround(Math_floor(Math_fround(+1)))); +; CHECK-NEXT: (+_min(+1,+1)); +; CHECK-NEXT: (Math_fround(+(_fmin(+1,+1)))); +; CHECK-NEXT: (+_max(+1,+1)); +; CHECK-NEXT: (Math_fround(+(_fmax(+1,+1)))); +; CHECK-NEXT: (+Math_abs(+1)); +; CHECK-NEXT: (Math_fround(+(_absf(+1)))); +; CHECK-NEXT: (+Math_sin(+1)); +; CHECK-NEXT: (Math_fround(+(Math_sin(+1)))); +define void @foo(i32 %x) { +entry: + %f = fadd float 1.0, 2.0 + %d = fadd double 1.0, 2.0 + + %sqrtd = call double @sqrt(double 1.0) + %sqrtf = call float @sqrtf(float 1.0) + %sqrtdv = call double @sqrt(double %d) ; check vars too + %sqrtfv = call float @sqrtf(float %f) + + %ceild = call double @ceil(double 1.0) + %ceilf = call float @ceilf(float 1.0) + + %floord = call double @floor(double 1.0) + %floorf = call float @floorf(float 1.0) + + ; these could be optimized in theory + + %mind = call double @min(double 1.0, double 1.0) + %minf = call float @fmin(float 1.0, float 1.0) + + %maxd = call double @max(double 1.0, double 1.0) + %maxf = call float @fmax(float 1.0, float 1.0) + + %absd = call double @abs(double 1.0) + %absf = call float @absf(float 1.0) + + ; sin is NOT optimizable with floats + + %sind = call double @sin(double 1.0) + %sinf = call float @sinf(float 1.0) + + ret void +} + +declare double @sqrt(double %x) +declare float @sqrtf(float %x) + +declare double @ceil(double %x) +declare float @ceilf(float %x) + +declare double @floor(double %x) +declare float @floorf(float %x) + +declare double @min(double %x, double %y) +declare float @fmin(float %x, float %y) + +declare double @max(double %x, double %y) +declare float @fmax(float %x, float %y) + +declare double @abs(double %x) +declare float @absf(float %x) + +declare double @sin(double %x) +declare float @sinf(float %x) + +attributes #0 = { nounwind readnone } + diff --git a/test/CodeGen/JS/ffis.ll b/test/CodeGen/JS/ffis.ll new file mode 100644 index 00000000000..9e3de7bad1f --- /dev/null +++ b/test/CodeGen/JS/ffis.ll @@ -0,0 +1,78 @@ +; RUN: llc -march=js < %s | FileCheck %s + +; Use proper types to ffi calls, no float32 + +; CHECK: (+Math_sqrt(+1)); +; CHECK-NEXT: (+Math_sqrt(+1)); +; CHECK-NEXT: (+Math_sqrt((+$d))); +; CHECK-NEXT: (+Math_sqrt((+$f))); +; CHECK-NEXT: (+Math_ceil(+1)); +; CHECK-NEXT: (+Math_ceil(+1)); +; CHECK-NEXT: (+Math_floor(+1)); +; CHECK-NEXT: (+Math_floor(+1)); +; CHECK-NEXT: (+_min(+1,+1)); +; CHECK-NEXT: (+_fmin(+1,+1)); +; CHECK-NEXT: (+_max(+1,+1)); +; CHECK-NEXT: (+_fmax(+1,+1)); +; CHECK-NEXT: (+Math_abs(+1)); +; CHECK-NEXT: (+_absf(+1)); +; CHECK-NEXT: (+Math_sin(+1)); +; CHECK-NEXT: (+Math_sin(+1)); +define void @foo(i32 %x) { +entry: + %f = fadd float 1.0, 2.0 + %d = fadd double 1.0, 2.0 + + %sqrtd = call double @sqrt(double 1.0) + %sqrtf = call float @sqrtf(float 1.0) + %sqrtdv = call double @sqrt(double %d) ; check vars too + %sqrtfv = call float @sqrtf(float %f) + + %ceild = call double @ceil(double 1.0) + %ceilf = call float @ceilf(float 1.0) + + %floord = call double @floor(double 1.0) + %floorf = call float @floorf(float 1.0) + + ; these could be optimized in theory + + %mind = call double @min(double 1.0, double 1.0) + %minf = call float @fmin(float 1.0, float 1.0) + + %maxd = call double @max(double 1.0, double 1.0) + %maxf = call float @fmax(float 1.0, float 1.0) + + %absd = call double @abs(double 1.0) + %absf = call float @absf(float 1.0) + + ; sin is NOT optimizable with floats + + %sind = call double @sin(double 1.0) + %sinf = call float @sinf(float 1.0) + + ret void +} + +declare double @sqrt(double %x) +declare float @sqrtf(float %x) + +declare double @ceil(double %x) +declare float @ceilf(float %x) + +declare double @floor(double %x) +declare float @floorf(float %x) + +declare double @min(double %x, double %y) +declare float @fmin(float %x, float %y) + +declare double @max(double %x, double %y) +declare float @fmax(float %x, float %y) + +declare double @abs(double %x) +declare float @absf(float %x) + +declare double @sin(double %x) +declare float @sinf(float %x) + +attributes #0 = { nounwind readnone } + diff --git a/test/CodeGen/JS/getelementptr.ll b/test/CodeGen/JS/getelementptr.ll index 22919097785..2dbb868b94e 100644 --- a/test/CodeGen/JS/getelementptr.ll +++ b/test/CodeGen/JS/getelementptr.ll @@ -2,7 +2,7 @@ ; Test simple getelementptr codegen. -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" ; CHECK: function _getelementptr([[VAL_P:\$[a-z_]+]]) { ; CHECK: [[GEP:\$[a-z_]+]] = (([[GEPINT:\$[a-z_]+]]) + 588)|0; diff --git a/test/CodeGen/JS/global-alias.ll b/test/CodeGen/JS/global-alias.ll new file mode 100644 index 00000000000..b6efc0e7e7a --- /dev/null +++ b/test/CodeGen/JS/global-alias.ll @@ -0,0 +1,56 @@ +; RUN: llc -march=js < %s | FileCheck %s + +; Handle global aliases of various kinds. + +@pri = internal global [60 x i8] zeroinitializer +@pub = global [60 x i8] zeroinitializer + +@pri_int = alias internal [60 x i8]* @pri +@pri_wea = alias weak [60 x i8]* @pri +@pri_nor = alias [60 x i8]* @pri + +@pub_int = alias internal [60 x i8]* @pub +@pub_wea = alias weak [60 x i8]* @pub +@pub_nor = alias [60 x i8]* @pub + +; CHECK: test0( +; CHECK: return ([[PRI:[0-9]+]]|0); +define [60 x i8]* @test0() { + ret [60 x i8]* @pri +} +; CHECK: test1( +; CHECK: return ([[PRI]]|0); +define [60 x i8]* @test1() { + ret [60 x i8]* @pri_int +} +; CHECK: test2( +; CHECK: return ([[PRI]]|0); +define [60 x i8]* @test2() { + ret [60 x i8]* @pri_wea +} +; CHECK: test3( +; CHECK: return ([[PRI]]|0); +define [60 x i8]* @test3() { + ret [60 x i8]* @pri_nor +} + +; CHECK: test4( +; CHECK: return ([[PUB:[0-9]+]]|0); +define [60 x i8]* @test4() { + ret [60 x i8]* @pub +} +; CHECK: test5( +; CHECK: return ([[PUB]]|0); +define [60 x i8]* @test5() { + ret [60 x i8]* @pub_int +} +; CHECK: test6( +; CHECK: return ([[PUB]]|0); +define [60 x i8]* @test6() { + ret [60 x i8]* @pub_wea +} +; CHECK: test7( +; CHECK: return ([[PUB]]|0); +define [60 x i8]* @test7() { + ret [60 x i8]* @pub_nor +} diff --git a/test/CodeGen/JS/invariant-intrinsics.ll b/test/CodeGen/JS/invariant-intrinsics.ll new file mode 100644 index 00000000000..dc156a9ffbf --- /dev/null +++ b/test/CodeGen/JS/invariant-intrinsics.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=js < %s | not grep invariant + +; llc currently emits no code or bookkeeping for invariant intrinsic calls +; or declarations. + +declare void @bar(i8*) + +define void @foo() { + %p = alloca i8 + %i = call {}* @llvm.invariant.start(i64 1, i8* %p) + call void @bar(i8* %p) + call void @llvm.invariant.end({}* %i, i64 1, i8* %p) + ret void +} + +declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare void @llvm.invariant.end({}*, i64, i8* nocapture) diff --git a/test/CodeGen/JS/lifetime-intrinsics.ll b/test/CodeGen/JS/lifetime-intrinsics.ll new file mode 100644 index 00000000000..e3d8560e301 --- /dev/null +++ b/test/CodeGen/JS/lifetime-intrinsics.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=js < %s | not grep lifetime + +; llc currently emits no code or bookkeeping for lifetime intrinsic calls +; or declarations. + +declare void @bar(i8*) + +define void @foo() { + %p = alloca i8 + call void @llvm.lifetime.start(i64 1, i8* %p) + call void @bar(i8* %p) + call void @llvm.lifetime.end(i64 1, i8* %p) + ret void +} + +declare void @llvm.lifetime.start(i64, i8* nocapture) +declare void @llvm.lifetime.end(i64, i8* nocapture) diff --git a/test/CodeGen/JS/mem-intrinsics.ll b/test/CodeGen/JS/mem-intrinsics.ll new file mode 100644 index 00000000000..995d70d5351 --- /dev/null +++ b/test/CodeGen/JS/mem-intrinsics.ll @@ -0,0 +1,53 @@ +; RUN: llc -march=js < %s | FileCheck %s + +; llc should emit small aligned memcpy and memset inline. + +; CHECK: test_unrolled_memcpy +; CHECK: HEAP32[$d+0>>2]=HEAP32[$s+0>>2]|0;HEAP32[$d+4>>2]=HEAP32[$s+4>>2]|0;HEAP32[$d+8>>2]=HEAP32[$s+8>>2]|0;HEAP32[$d+12>>2]=HEAP32[$s+12>>2]|0;HEAP32[$d+16>>2]=HEAP32[$s+16>>2]|0;HEAP32[$d+20>>2]=HEAP32[$s+20>>2]|0;HEAP32[$d+24>>2]=HEAP32[$s+24>>2]|0;HEAP32[$d+28>>2]=HEAP32[$s+28>>2]|0; +define void @test_unrolled_memcpy(i8* %d, i8* %s) { + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 32, i32 4, i1 false) + ret void +} + +; CHECK: test_loop_memcpy +; CHECK: dest=$d+0|0; src=$s+0|0; stop=dest+64|0; do { HEAP32[dest>>2]=HEAP32[src>>2]|0; dest=dest+4|0; src=src+4|0; } while ((dest|0) < (stop|0)) +define void @test_loop_memcpy(i8* %d, i8* %s) { + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 64, i32 4, i1 false) + ret void +} + +; CHECK: test_call_memcpy +; CHECK: memcpy(($d|0),($s|0),65536) +define void @test_call_memcpy(i8* %d, i8* %s) { + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 65536, i32 4, i1 false) + ret void +} + +; CHECK: test_unrolled_memset +; CHECK: HEAP32[$d+0>>2]=0|0;HEAP32[$d+4>>2]=0|0;HEAP32[$d+8>>2]=0|0;HEAP32[$d+12>>2]=0|0;HEAP32[$d+16>>2]=0|0;HEAP32[$d+20>>2]=0|0;HEAP32[$d+24>>2]=0|0;HEAP32[$d+28>>2]=0|0; +define void @test_unrolled_memset(i8* %d, i8* %s) { + call void @llvm.memset.p0i8.i32(i8* %d, i8 0, i32 32, i32 4, i1 false) + ret void +} + +; CHECK: test_loop_memset +; CHECK: dest=$d+0|0; stop=dest+64|0; do { HEAP32[dest>>2]=0|0; dest=dest+4|0; } while ((dest|0) < (stop|0)); +define void @test_loop_memset(i8* %d, i8* %s) { + call void @llvm.memset.p0i8.i32(i8* %d, i8 0, i32 64, i32 4, i1 false) + ret void +} + +; CHECK: test_call_memset +; CHECK: memset(($d|0),0,65536) +define void @test_call_memset(i8* %d, i8* %s) { + call void @llvm.memset.p0i8.i32(i8* %d, i8 0, i32 65536, i32 4, i1 false) + ret void +} + +; Also, don't emit declarations for the intrinsic functions. +; CHECK-NOT: p0i8 + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 + +attributes #0 = { nounwind } diff --git a/test/CodeGen/JS/phi.ll b/test/CodeGen/JS/phi.ll new file mode 100644 index 00000000000..517f73cba39 --- /dev/null +++ b/test/CodeGen/JS/phi.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=js < %s | FileCheck %s + +; Phi lowering should check for dependency cycles, including looking through +; bitcasts, and emit extra copies as needed. + +; CHECK: while(1) { +; CHECK: $k$phi = $j;$j$phi = $k;$k = $k$phi;$j = $j$phi; +; CHECK: } +define void @foo(float* nocapture %p, i32* %j.init, i32* %k.init) { +entry: + br label %for.body + +for.body: + %j = phi i32* [ %j.init, %entry ], [ %k.cast, %more ] + %k = phi i32* [ %k.init, %entry ], [ %j.cast, %more ] + br label %more + +more: + %j.cast = bitcast i32* %j to i32* + %k.cast = bitcast i32* %k to i32* + br label %for.body +} diff --git a/test/Transforms/GlobalOpt/array-elem-refs.ll b/test/Transforms/GlobalOpt/array-elem-refs.ll new file mode 100644 index 00000000000..ec472b0e99f --- /dev/null +++ b/test/Transforms/GlobalOpt/array-elem-refs.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -S -globalopt | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.S = type { i8, i8 } + +@c = internal global i8** bitcast (i8* getelementptr (i8* bitcast ([8 x i8*]* @b to i8*), i64 48) to i8**), align 8 +@b = internal global [8 x i8*] [i8* null, i8* null, i8* null, i8* null, i8* null, i8* null, i8* getelementptr inbounds (%struct.S* @a, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S* @a, i32 0, i32 0), i64 1)], align 16 +@a = internal global %struct.S zeroinitializer, align 1 + +; Function Attrs: nounwind uwtable +define signext i8 @foo() #0 { +entry: + %0 = load i8*** @c, align 8 + %1 = load i8** %0, align 8 + %2 = load i8* %1, align 1 + ret i8 %2 + +; CHECK-LABEL: @foo +; CHECK: ret i8 0 +} + +; Function Attrs: nounwind uwtable +define i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + ret i32 0 +} + +attributes #0 = { nounwind uwtable } + diff --git a/test/Transforms/NaCl/promote-integers.ll b/test/Transforms/NaCl/promote-integers.ll index 7c010be32bf..baab0822cd0 100644 --- a/test/Transforms/NaCl/promote-integers.ll +++ b/test/Transforms/NaCl/promote-integers.ll @@ -228,6 +228,62 @@ define void @ashr1(i16 %a) { ret void } +; CHECK: @udiv1 +define void @udiv1(i32 %a, i32 %b) { +; CHECK-NEXT: %a33 = zext i32 %a to i64 + %a33 = zext i32 %a to i33 +; CHECK-NEXT: %b33 = zext i32 %b to i64 + %b33 = zext i32 %b to i33 +; CHECK-NEXT: %a33.clear = and i64 %a33, 8589934591 +; CHECK-NEXT: %b33.clear = and i64 %b33, 8589934591 +; CHECK-NEXT: %result = udiv i64 %a33.clear, %b33.clear + %result = udiv i33 %a33, %b33 + ret void +} + +; CHECK: @sdiv1 +define void @sdiv1(i32 %a, i32 %b) { +; CHECK-NEXT: %a33 = sext i32 %a to i64 + %a33 = sext i32 %a to i33 +; CHECK-NEXT: %b33 = sext i32 %b to i64 +; CHECK-NEXT: %a33.getsign = shl i64 %a33, 31 +; CHECK-NEXT: %a33.signed = ashr i64 %a33.getsign, 31 +; CHECK-NEXT: %b33.getsign = shl i64 %b33, 31 +; CHECK-NEXT: %b33.signed = ashr i64 %b33.getsign, 31 + %b33 = sext i32 %b to i33 +; CHECK-NEXT: %result = sdiv i64 %a33.signed, %b33.signed + %result = sdiv i33 %a33, %b33 + ret void +} + +; CHECK: @urem1 +define void @urem1(i32 %a, i32 %b) { +; CHECK-NEXT: %a33 = zext i32 %a to i64 + %a33 = zext i32 %a to i33 +; CHECK-NEXT: %b33 = zext i32 %b to i64 +; CHECK-NEXT: %a33.clear = and i64 %a33, 8589934591 +; CHECK-NEXT: %b33.clear = and i64 %b33, 8589934591 + %b33 = zext i32 %b to i33 +; CHECK-NEXT: %result = urem i64 %a33.clear, %b33.clear + %result = urem i33 %a33, %b33 + ret void +} + +; CHECK: @srem1 +define void @srem1(i32 %a, i32 %b) { +; CHECK-NEXT: %a33 = sext i32 %a to i64 + %a33 = sext i32 %a to i33 +; CHECK-NEXT: %b33 = sext i32 %b to i64 +; CHECK-NEXT: %a33.getsign = shl i64 %a33, 31 +; CHECK-NEXT: %a33.signed = ashr i64 %a33.getsign, 31 +; CHECK-NEXT: %b33.getsign = shl i64 %b33, 31 +; CHECK-NEXT: %b33.signed = ashr i64 %b33.getsign, 31 + %b33 = sext i32 %b to i33 +; CHECK-NEXT: %result = srem i64 %a33.signed, %b33.signed + %result = srem i33 %a33, %b33 + ret void +} + ; CHECK: @phi_icmp define void @phi_icmp(i32 %a) { entry: