diff --git a/emscripten-version.txt b/emscripten-version.txt index c241ad3bb2a..8b65a411073 100644 --- a/emscripten-version.txt +++ b/emscripten-version.txt @@ -1,2 +1,2 @@ -1.21.6 +1.21.7 diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index f1092638941..74851002f33 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -308,6 +308,7 @@ void initializeStripMetadataPass(PassRegistry&); void initializeExpandI64Pass(PassRegistry&); // XXX EMSCRIPTEN void initializeLowerEmExceptionsPass(PassRegistry&); // XXX EMSCRIPTEN void initializeLowerEmSetjmpPass(PassRegistry&); // XXX EMSCRIPTEN +void initializeLowerEmAsyncifyPass(PassRegistry&); // XXX EMSCRIPTEN void initializeNoExitRuntimePass(PassRegistry&); // XXX EMSCRIPTEN // @LOCALMOD-END } diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h index 7449dab9699..b3bf0c76b99 100644 --- a/include/llvm/Transforms/NaCl.h +++ b/include/llvm/Transforms/NaCl.h @@ -54,6 +54,7 @@ ModulePass *createExpandI64Pass(); // XXX EMSCRIPTEN ModulePass *createLowerEmExceptionsPass(); // XXX EMSCRIPTEN ModulePass *createLowerEmSetjmpPass(); // XXX EMSCRIPTEN ModulePass *createNoExitRuntimePass(); // XXX EMSCRIPTEN +ModulePass *createLowerEmAsyncifyPass(); // XXX EMSCRIPTEN void PNaClABISimplifyAddPreOptPasses(PassManager &PM); void PNaClABISimplifyAddPostOptPasses(PassManager &PM); diff --git a/lib/Target/JSBackend/CallHandlers.h b/lib/Target/JSBackend/CallHandlers.h index 5f001429eec..5eda93dfac1 100644 --- a/lib/Target/JSBackend/CallHandlers.h +++ b/lib/Target/JSBackend/CallHandlers.h @@ -189,6 +189,27 @@ DEF_CALL_HANDLER(emscripten_get_longjmp_result, { return getAssign(CI) + "tempRet0"; }) +// supporting async functions, see `/src/library_async.js` for detail. +DEF_CALL_HANDLER(emscripten_alloc_async_context, { + // insert sp as the 2nd parameter + return getAssign(CI) + "_emscripten_alloc_async_context(" + getValueAsStr(CI->getOperand(0)) + ",sp)|0"; +}) +DEF_CALL_HANDLER(emscripten_check_async, { + return getAssign(CI) + "___async"; +}) +// prevent unwinding the stack +// preserve the return value of the return inst +DEF_CALL_HANDLER(emscripten_do_not_unwind, { + return "sp = STACKTOP"; +}) +// prevent unwinding the async stack +DEF_CALL_HANDLER(emscripten_do_not_unwind_async, { + return "___async_unwind = 0"; +}) +DEF_CALL_HANDLER(emscripten_get_async_return_value_addr, { + return getAssign(CI) + "___async_retval"; +}) + // emscripten instrinsics DEF_CALL_HANDLER(emscripten_debugger, { CanValidate = false; @@ -560,6 +581,11 @@ void setupCallHandlers() { SETUP_CALL_HANDLER(emscripten_longjmp); SETUP_CALL_HANDLER(emscripten_check_longjmp); SETUP_CALL_HANDLER(emscripten_get_longjmp_result); + SETUP_CALL_HANDLER(emscripten_alloc_async_context); + SETUP_CALL_HANDLER(emscripten_check_async); + SETUP_CALL_HANDLER(emscripten_do_not_unwind); + SETUP_CALL_HANDLER(emscripten_do_not_unwind_async); + SETUP_CALL_HANDLER(emscripten_get_async_return_value_addr); SETUP_CALL_HANDLER(emscripten_debugger); SETUP_CALL_HANDLER(getHigh32); SETUP_CALL_HANDLER(setHigh32); diff --git a/lib/Transforms/NaCl/CMakeLists.txt b/lib/Transforms/NaCl/CMakeLists.txt index dd76491d659..9068483f148 100644 --- a/lib/Transforms/NaCl/CMakeLists.txt +++ b/lib/Transforms/NaCl/CMakeLists.txt @@ -30,6 +30,7 @@ add_llvm_library(LLVMNaClTransforms StripMetadata.cpp LowerEmExceptionsPass.cpp LowerEmSetjmp.cpp + LowerEmAsyncify.cpp NoExitRuntime.cpp ) diff --git a/lib/Transforms/NaCl/LowerEmAsyncify.cpp b/lib/Transforms/NaCl/LowerEmAsyncify.cpp new file mode 100644 index 00000000000..8e9a42426d9 --- /dev/null +++ b/lib/Transforms/NaCl/LowerEmAsyncify.cpp @@ -0,0 +1,728 @@ +//===- LowerEmAsyncify - transform asynchronous functions for Emscripten/JS -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Lu Wang +// +// In JS we don't have functions like sleep(), which is on the other hand very popuar in C/C++ etc. +// This pass tries to convert funcitons calling sleep() into a valid form in JavaScript +// The basic idea is to split the callee at the place where sleep() is called, +// then the first half may schedule the second half using setTimeout. +// But we need to pay lots of attention to analyzing/saving/restoring context variables and return values +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" // for DemoteRegToStack +#include "llvm/Transforms/Utils/PromoteMemToReg.h" // for PromoteMemToReg +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Pass.h" + +#include + +#ifdef NDEBUG +#undef assert +#define assert(x) { if (!(x)) report_fatal_error(#x); } +#endif + +using namespace llvm; + +static cl::list +AsyncifyFunctions("emscripten-asyncify-functions", + cl::desc("Functions that call one of these functions, directly or indirectly, will be asyncified"), + cl::CommaSeparated); + +static cl::list +AsyncifyWhiteList("emscripten-asyncify-whitelist", + cl::desc("Functions that should not be asyncified"), + cl::CommaSeparated); + +namespace { + class LowerEmAsyncify: public ModulePass { + Module *TheModule; + + public: + static char ID; // Pass identification, replacement for typeid + explicit LowerEmAsyncify() : ModulePass(ID), TheModule(NULL) { + initializeLowerEmAsyncifyPass(*PassRegistry::getPassRegistry()); + } + virtual ~LowerEmAsyncify() { } + bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + ModulePass::getAnalysisUsage(AU); + } + + private: + DataLayout *DL; + + Type *Void, *I1, *I32, *I32Ptr; + FunctionType *VFunction, *I1Function, *I32PFunction; + FunctionType *VI32PFunction, *I32PI32Function; + FunctionType *CallbackFunctionType; + + Function *AllocAsyncCtxFunction, *ReallocAsyncCtxFunction, *FreeAsyncCtxFunction; + Function *CheckAsyncFunction; + Function *DoNotUnwindFunction, *DoNotUnwindAsyncFunction; + Function *GetAsyncReturnValueAddrFunction; + + void initTypesAndFunctions(void); + + typedef std::vector Instructions; + typedef DenseMap FunctionInstructionsMap; + typedef std::vector Values; + typedef SmallPtrSet BasicBlockSet; + + // all the information we want for an async call + struct AsyncCallEntry { + Instruction *AsyncCallInst; // calling an async function + BasicBlock *AfterCallBlock; // the block we should continue on after getting the return value of AsynCallInst + CallInst *AllocAsyncCtxInst; // where we allocate the async ctx before the async call, in the original function + Values ContextVariables; // those need to be saved and restored for the async call + StructType *ContextStructType; // The structure constructing all the context variables + BasicBlock *SaveAsyncCtxBlock; // the block in which we save all the variables + Function *CallbackFunc; // the callback function for this async call, which is converted from the original function + }; + + BasicBlockSet FindReachableBlocksFrom(BasicBlock *src); + + // Find everything that we should save and restore for the async call + // save them to Entry.ContextVariables + void FindContextVariables(AsyncCallEntry & Entry); + + // The essential function + // F is now in the sync form, transform it into an async form that is valid in JS + void transformAsyncFunction(Function &F, Instructions const& AsyncCalls); + + bool IsFunctionPointerCall(const Instruction *I); + }; +} + +char LowerEmAsyncify::ID = 0; +INITIALIZE_PASS(LowerEmAsyncify, "loweremasyncify", + "Lower async functions for js/emscripten", + false, false) + +bool LowerEmAsyncify::runOnModule(Module &M) { + TheModule = &M; + + std::set WhiteList(AsyncifyWhiteList.begin(), AsyncifyWhiteList.end()); + + /* + * collect all the functions that should be asyncified + * any function that _might_ call an async function is also async + */ + std::vector AsyncFunctionsPending; + for(unsigned i = 0; i < AsyncifyFunctions.size(); ++i) { + std::string const& AFName = AsyncifyFunctions[i]; + Function *F = TheModule->getFunction(AFName); + if (F && !WhiteList.count(F->getName())) { + AsyncFunctionsPending.push_back(F); + } + } + + // No function needed to transform + if (AsyncFunctionsPending.empty()) return false; + + // Walk through the call graph and find all the async functions + FunctionInstructionsMap AsyncFunctionCalls; + { + // pessimistic: consider all indirect calls as possibly async + // TODO: deduce based on function types + for (Module::iterator FI = TheModule->begin(), FE = TheModule->end(); FI != FE; ++FI) { + if (WhiteList.count(FI->getName())) continue; + + bool has_indirect_call = false; + for (inst_iterator I = inst_begin(FI), E = inst_end(FI); I != E; ++I) { + if (IsFunctionPointerCall(&*I)) { + has_indirect_call = true; + AsyncFunctionCalls[FI].push_back(&*I); + } + } + + if (has_indirect_call) AsyncFunctionsPending.push_back(FI); + } + + while (!AsyncFunctionsPending.empty()) { + Function *CurFunction = AsyncFunctionsPending.back(); + AsyncFunctionsPending.pop_back(); + + for (Value::use_iterator UI = CurFunction->use_begin(), E = CurFunction->use_end(); UI != E; ++UI) { + ImmutableCallSite ICS(*UI); + if (!ICS) continue; + // we only need those instructions calling the function + // if the function address is used for other purpose, we don't care + if (CurFunction != ICS.getCalledValue()->stripPointerCasts()) continue; + // Now I is either CallInst or InvokeInst + Instruction *I = cast(*UI); + Function *F = I->getParent()->getParent(); + if (AsyncFunctionCalls.count(F) == 0) { + AsyncFunctionsPending.push_back(F); + } + AsyncFunctionCalls[F].push_back(I); + } + } + } + + // exit if no async function is found at all + if (AsyncFunctionCalls.empty()) return false; + + initTypesAndFunctions(); + + for (FunctionInstructionsMap::iterator I = AsyncFunctionCalls.begin(), E = AsyncFunctionCalls.end(); + I != E; ++I) { + transformAsyncFunction(*(I->first), I->second); + } + + return true; +} + +void LowerEmAsyncify::initTypesAndFunctions(void) { + DL = &getAnalysis(); + + // Data types + Void = Type::getVoidTy(TheModule->getContext()); + I1 = Type::getInt1Ty(TheModule->getContext()); + I32 = Type::getInt32Ty(TheModule->getContext()); + I32Ptr = Type::getInt32PtrTy(TheModule->getContext()); + + // Function types + SmallVector ArgTypes; + VFunction = FunctionType::get(Void, false); + I1Function = FunctionType::get(I1, false); + I32PFunction = FunctionType::get(I32Ptr, false); + + ArgTypes.clear(); + ArgTypes.push_back(I32Ptr); + VI32PFunction = FunctionType::get(Void, ArgTypes, false); + + ArgTypes.clear(); + ArgTypes.push_back(I32); + I32PI32Function = FunctionType::get(I32Ptr, ArgTypes, false); + + CallbackFunctionType = VI32PFunction; + + // Functions + CheckAsyncFunction = Function::Create( + I1Function, + GlobalValue::ExternalLinkage, + "emscripten_check_async", + TheModule + ); + + AllocAsyncCtxFunction = Function::Create( + I32PI32Function, + GlobalValue::ExternalLinkage, + "emscripten_alloc_async_context", + TheModule + ); + + ReallocAsyncCtxFunction = Function::Create( + I32PI32Function, + GlobalValue::ExternalLinkage, + "emscripten_realloc_async_context", + TheModule + ); + + FreeAsyncCtxFunction = Function::Create( + VI32PFunction, + GlobalValue::ExternalLinkage, + "emscripten_free_async_context", + TheModule + ); + + DoNotUnwindFunction = Function::Create( + VFunction, + GlobalValue::ExternalLinkage, + "emscripten_do_not_unwind", + TheModule + ); + + DoNotUnwindAsyncFunction = Function::Create( + VFunction, + GlobalValue::ExternalLinkage, + "emscripten_do_not_unwind_async", + TheModule + ); + + GetAsyncReturnValueAddrFunction = Function::Create( + I32PFunction, + GlobalValue::ExternalLinkage, + "emscripten_get_async_return_value_addr", + TheModule + ); +} + +LowerEmAsyncify::BasicBlockSet LowerEmAsyncify::FindReachableBlocksFrom(BasicBlock *src) { + BasicBlockSet ReachableBlockSet; + std::vector pending; + ReachableBlockSet.insert(src); + pending.push_back(src); + while (!pending.empty()) { + BasicBlock *CurBlock = pending.back(); + pending.pop_back(); + for (succ_iterator SI = succ_begin(CurBlock), SE = succ_end(CurBlock); SI != SE; ++SI) { + if (ReachableBlockSet.count(*SI) == 0) { + ReachableBlockSet.insert(*SI); + pending.push_back(*SI); + } + } + } + return ReachableBlockSet; +} + +void LowerEmAsyncify::FindContextVariables(AsyncCallEntry & Entry) { + BasicBlock *AfterCallBlock = Entry.AfterCallBlock; + + Function & F = *AfterCallBlock->getParent(); + + // Create a new entry block as if in the callback function + // theck check variables that no longer properly dominate their uses + BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "", &F, &F.getEntryBlock()); + BranchInst::Create(AfterCallBlock, EntryBlock); + + DominatorTree DT; + DT.runOnFunction(F); + + // These blocks may be using some values defined at or before AsyncCallBlock + BasicBlockSet Ramifications = FindReachableBlocksFrom(AfterCallBlock); + + SmallPtrSet ContextVariables; + Values Pending; + + // Examine the instructions, find all variables that we need to store in the context + for (BasicBlockSet::iterator RI = Ramifications.begin(), RE = Ramifications.end(); RI != RE; ++RI) { + for (BasicBlock::iterator I = (*RI)->begin(), E = (*RI)->end(); I != E; ++I) { + for (unsigned i = 0, NumOperands = I->getNumOperands(); i < NumOperands; ++i) { + Value *O = I->getOperand(i); + if (Instruction *Inst = dyn_cast(O)) { + if (Inst == Entry.AsyncCallInst) continue; // for the original async call, we will load directly from async return value + if (ContextVariables.count(Inst) != 0) continue; // already examined + + if (!DT.dominates(Inst, I->getOperandUse(i))) { + // `I` is using `Inst`, yet `Inst` does not dominate `I` if we arrive directly at AfterCallBlock + // so we need to save `Inst` in the context + ContextVariables.insert(Inst); + Pending.push_back(Inst); + } + } else if (Argument *Arg = dyn_cast(O)) { + // count() should be as fast/slow as insert, so just insert here + ContextVariables.insert(Arg); + } + } + } + } + + // restore F + EntryBlock->eraseFromParent(); + + Entry.ContextVariables.clear(); + Entry.ContextVariables.reserve(ContextVariables.size()); + for (SmallPtrSet::iterator I = ContextVariables.begin(), E = ContextVariables.end(); I != E; ++I) { + Entry.ContextVariables.push_back(*I); + } +} + +/* + * Consider that F contains a call to G, both of which are async: + * + * function F: + * ... + * %0 = G(%1, %2, ...); + * ... + * return %%; + * + * We want to convert F and generate F__asyn_cb + * they are similar, but with minor yet important differences + * Note those `main func only` and `callback func only` instructions + +////////////////////////////////////////////////////////// + function F: + ... + ctx = alloc_ctx(len, sp); // main func only + // TODO + // we could also do this only after an async call + // but in that case we will need to pass ctx to the function + // since ctx is no longer in the top async stack frame + %0 = G(%1, %2, ...); + if (async) { // G was async + save context variables in ctx + register F.async_cb as the callback in frame + return without unwinding the stack frame + } else { // G was sync + // use %0 as normal + free_ctx(ctx); // main func only + // ctx is freed here, because so far F is still a sync function + // and we don't want any side effects + ... + async return value = %%; + return & normally unwind the stack frame // main func only + } +////////////////////////////////////////////////////////// + + * And here's F.async_cb + +////////////////////////////////////////////////////////// + function F.async_cb(ctx): + load variables from ctx // callback func only + goto resume_point; // callback func only + ... + ctx = realloc_ctx(len); // callback func only + // realloc_ctx is different from alloc_ctx + // which reused the current async stack frame + // we want to keep the saved stack pointer + %0 = G(%1, %2, ...); + if (async) { + save context variables in ctx + register F.async_cb as the callback + return without unwinding the stack frame + } else { + resume_point: + %0'= either $0 or the async return value // callback func only + ... + async return value = %% + return restore the stack pointer back to the value stored in F // callback func only + // no need to free the ctx + // the scheduler will be aware of this return and handle the stack frames + } +////////////////////////////////////////////////////////// + + */ + +void LowerEmAsyncify::transformAsyncFunction(Function &F, Instructions const& AsyncCalls) { + assert(!AsyncCalls.empty()); + + // Pass 0 + // collect all the return instructions from the original function + // will use later + std::vector OrigReturns; + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { + if (ReturnInst *RI = dyn_cast(&*I)) { + OrigReturns.push_back(RI); + } + } + + // Pass 1 + // Scan each async call and make the basic structure: + // All these will be cloned into the callback functions + // - allocate the async context before calling an async function + // - check async right after calling an async function, save context & return if async, continue if not + // - retrieve the async return value and free the async context if the called function turns out to be sync + std::vector AsyncCallEntries; + AsyncCallEntries.reserve(AsyncCalls.size()); + for (Instructions::const_iterator I = AsyncCalls.begin(), E = AsyncCalls.end(); I != E; ++I) { + // prepare blocks + Instruction *CurAsyncCall = *I; + + // The block containing the async call + BasicBlock *CurBlock = CurAsyncCall->getParent(); + // The block should run after the async call + BasicBlock *AfterCallBlock = SplitBlock(CurBlock, CurAsyncCall->getNextNode(), this); + // The block where we store the context and return + BasicBlock *SaveAsyncCtxBlock = BasicBlock::Create(TheModule->getContext(), "SaveAsyncCtx", &F, AfterCallBlock); + // return a dummy value at the end, to make the block valid + new UnreachableInst(TheModule->getContext(), SaveAsyncCtxBlock); + + // allocate the context before making the call + // we don't know the size yet, will fix it later + // we cannot insert the instruction later because, + // we need to make sure that all the instructions and blocks are fixed before we can generate DT and find context variables + // In CallHandler.h `sp` will be put as the second parameter + // such that we can take a note of the original sp + CallInst *AllocAsyncCtxInst = CallInst::Create(AllocAsyncCtxFunction, Constant::getNullValue(I32), "AsyncCtx", CurAsyncCall); + + // Right after the call + // check async and return if so + // TODO: we can define truly async functions and partial async functions + { + // remove old terminator, which came from SplitBlock + CurBlock->getTerminator()->eraseFromParent(); + // go to SaveAsyncCtxBlock if the previous call is async + // otherwise just continue to AfterCallBlock + CallInst *CheckAsync = CallInst::Create(CheckAsyncFunction, "IsAsync", CurBlock); + BranchInst::Create(SaveAsyncCtxBlock, AfterCallBlock, CheckAsync, CurBlock); + } + + // take a note of this async call + AsyncCallEntry CurAsyncCallEntry; + CurAsyncCallEntry.AsyncCallInst = CurAsyncCall; + CurAsyncCallEntry.AfterCallBlock = AfterCallBlock; + CurAsyncCallEntry.AllocAsyncCtxInst = AllocAsyncCtxInst; + CurAsyncCallEntry.SaveAsyncCtxBlock = SaveAsyncCtxBlock; + // create an empty function for the callback, which will be constructed later + CurAsyncCallEntry.CallbackFunc = Function::Create(CallbackFunctionType, F.getLinkage(), F.getName() + "__async_cb", TheModule); + AsyncCallEntries.push_back(CurAsyncCallEntry); + } + + + // Pass 2 + // analyze the context variables and construct SaveAsyncCtxBlock for each async call + // also calculate the size of the context and allocate the async context accordingly + for (std::vector::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { + AsyncCallEntry & CurEntry = *EI; + + // Collect everything to be saved + FindContextVariables(CurEntry); + + // Pack the variables as a struct + { + // TODO: sort them from large memeber to small ones, in order to make the struct compact even when aligned + SmallVector Types; + Types.push_back(CallbackFunctionType->getPointerTo()); + for (Values::iterator VI = CurEntry.ContextVariables.begin(), VE = CurEntry.ContextVariables.end(); VI != VE; ++VI) { + Types.push_back((*VI)->getType()); + } + CurEntry.ContextStructType = StructType::get(TheModule->getContext(), Types); + } + + // fix the size of allocation + CurEntry.AllocAsyncCtxInst->setOperand(0, + ConstantInt::get(I32, DL->getTypeStoreSize(CurEntry.ContextStructType))); + + // construct SaveAsyncCtxBlock + { + // fill in SaveAsyncCtxBlock + // temporarily remove the terminator for convenience + CurEntry.SaveAsyncCtxBlock->getTerminator()->eraseFromParent(); + assert(CurEntry.SaveAsyncCtxBlock->empty()); + + BitCastInst *AsyncCtxAddr = new BitCastInst(CurEntry.AllocAsyncCtxInst, CurEntry.ContextStructType->getPointerTo(), "AsyncCtxAddr", CurEntry.SaveAsyncCtxBlock); + SmallVector Indices; + // store the callback + { + Indices.push_back(ConstantInt::get(I32, 0)); + Indices.push_back(ConstantInt::get(I32, 0)); + GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock); + new StoreInst(CurEntry.CallbackFunc, AsyncVarAddr, CurEntry.SaveAsyncCtxBlock); + } + // store the context variables + for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { + Indices.clear(); + Indices.push_back(ConstantInt::get(I32, 0)); + Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element is the callback function + GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock); + new StoreInst(CurEntry.ContextVariables[i], AsyncVarAddr, CurEntry.SaveAsyncCtxBlock); + } + // to exit the block, we want to return without unwinding the stack frame + CallInst::Create(DoNotUnwindFunction, "", CurEntry.SaveAsyncCtxBlock); + ReturnInst::Create(TheModule->getContext(), + (F.getReturnType()->isVoidTy() ? 0 : Constant::getNullValue(F.getReturnType())), + CurEntry.SaveAsyncCtxBlock); + } + } + + // Pass 3 + // now all the SaveAsyncCtxBlock's have been constructed + // we can clone F and construct callback functions + // we could not construct the callbacks in Pass 2 because we need _all_ those SaveAsyncCtxBlock's appear in _each_ callback + for (std::vector::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { + AsyncCallEntry & CurEntry = *EI; + + Function *CurCallbackFunc = CurEntry.CallbackFunc; + ValueToValueMapTy VMap; + + // Add the entry block + // load variables from the context + // also update VMap for CloneFunction + BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "AsyncCallbackEntry", CurCallbackFunc); + std::vector LoadedAsyncVars; + { + BitCastInst *AsyncCtxAddr = new BitCastInst(CurCallbackFunc->arg_begin(), CurEntry.ContextStructType->getPointerTo(), "AsyncCtx", EntryBlock); + SmallVector Indices; + for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { + Indices.clear(); + Indices.push_back(ConstantInt::get(I32, 0)); + Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element of AsyncCtx is the callback function + GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddr, Indices, "", EntryBlock); + LoadedAsyncVars.push_back(new LoadInst(AsyncVarAddr, "", EntryBlock)); + // we want the argument to be replaced by the loaded value + if (isa(CurEntry.ContextVariables[i])) + VMap[CurEntry.ContextVariables[i]] = LoadedAsyncVars.back(); + } + } + + // we don't need any argument, just leave dummy entries there to cheat CloneFunctionInto + for (Function::const_arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) { + if (VMap.count(AI) == 0) + VMap[AI] = Constant::getNullValue(AI->getType()); + } + + // Clone the function + { + SmallVector Returns; + CloneFunctionInto(CurCallbackFunc, &F, VMap, false, Returns); + + // return type of the callback functions is always void + // need to fix the return type + if (!F.getReturnType()->isVoidTy()) { + // for those return instructions that are from the original function + // it means we are 'truly' leaving this function + // need to store the return value right before ruturn + for (size_t i = 0; i < OrigReturns.size(); ++i) { + ReturnInst *RI = cast(VMap[OrigReturns[i]]); + // Need to store the return value into the global area + CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", RI); + BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, F.getReturnType()->getPointerTo(), "AsyncRetValAddr", RI); + new StoreInst(RI->getOperand(0), RetValAddr, RI); + } + // we want to unwind the stack back to where it was before the original function as called + // but we don't actually need to do this here + // at this point it must be true that no callback is pended + // so the scheduler will correct the stack pointer and pop the frame + // here we just fix the return type + for (size_t i = 0; i < Returns.size(); ++i) { + ReplaceInstWithInst(Returns[i], ReturnInst::Create(TheModule->getContext())); + } + } + } + + // the callback function does not have any return value + // so clear all the attributes for return + { + AttributeSet Attrs = CurCallbackFunc->getAttributes(); + CurCallbackFunc->setAttributes( + Attrs.removeAttributes(TheModule->getContext(), AttributeSet::ReturnIndex, Attrs.getRetAttributes()) + ); + } + + // in the callback function, we never allocate a new async frame + // instead we reuse the existing one + for (std::vector::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { + Instruction *I = cast(VMap[EI->AllocAsyncCtxInst]); + ReplaceInstWithInst(I, CallInst::Create(ReallocAsyncCtxFunction, I->getOperand(0), "ReallocAsyncCtx")); + } + + // mapped entry point & async call + BasicBlock *ResumeBlock = cast(VMap[CurEntry.AfterCallBlock]); + Instruction *MappedAsyncCall = cast(VMap[CurEntry.AsyncCallInst]); + + // To save space, for each async call in the callback function, we just ignore the sync case, and leave it to the scheduler + // TODO need an option for this + { + for (std::vector::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { + AsyncCallEntry & CurEntry = *EI; + Instruction *MappedAsyncCallInst = cast(VMap[CurEntry.AsyncCallInst]); + BasicBlock *MappedAsyncCallBlock = MappedAsyncCallInst->getParent(); + BasicBlock *MappedAfterCallBlock = cast(VMap[CurEntry.AfterCallBlock]); + + // for the sync case of the call, go to NewBlock (instead of MappedAfterCallBlock) + BasicBlock *NewBlock = BasicBlock::Create(TheModule->getContext(), "", CurCallbackFunc, MappedAfterCallBlock); + MappedAsyncCallBlock->getTerminator()->setSuccessor(1, NewBlock); + // store the return value + if (!MappedAsyncCallInst->use_empty()) { + CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", NewBlock); + BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCallInst->getType()->getPointerTo(), "AsyncRetValAddr", NewBlock); + new StoreInst(MappedAsyncCallInst, RetValAddr, NewBlock); + } + // tell the scheduler that we want to keep the current async stack frame + CallInst::Create(DoNotUnwindAsyncFunction, "", NewBlock); + // finally we go to the SaveAsyncCtxBlock, to register the callbac, save the local variables and leave + BasicBlock *MappedSaveAsyncCtxBlock = cast(VMap[CurEntry.SaveAsyncCtxBlock]); + BranchInst::Create(MappedSaveAsyncCtxBlock, NewBlock); + } + } + + std::vector ToPromote; + // applying loaded variables in the entry block + { + BasicBlockSet ReachableBlocks = FindReachableBlocksFrom(ResumeBlock); + for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { + Value *OrigVar = CurEntry.ContextVariables[i]; + if (isa(OrigVar)) continue; // already processed + Value *CurVar = VMap[OrigVar]; + assert(CurVar != MappedAsyncCall); + if (Instruction *Inst = dyn_cast(CurVar)) { + if (ReachableBlocks.count(Inst->getParent())) { + // Inst could be either defined or loaded from the async context + // Do the dirty works in memory + // TODO: might need to check the safety first + // TODO: can we create phi directly? + AllocaInst *Addr = DemoteRegToStack(*Inst, false); + new StoreInst(LoadedAsyncVars[i], Addr, EntryBlock); + ToPromote.push_back(Addr); + } else { + // The parent block is not reachable, which means there is no confliction + // it's safe to replace Inst with the loaded value + assert(Inst != LoadedAsyncVars[i]); // this should only happen when OrigVar is an Argument + Inst->replaceAllUsesWith(LoadedAsyncVars[i]); + } + } + } + } + + // resolve the return value of the previous async function + // it could be the value just loaded from the global area + // or directly returned by the function (in its sync case) + if (!CurEntry.AsyncCallInst->use_empty()) { + // load the async return value + CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", EntryBlock); + BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCall->getType()->getPointerTo(), "AsyncRetValAddr", EntryBlock); + LoadInst *RetVal = new LoadInst(RetValAddr, "AsyncRetVal", EntryBlock); + + AllocaInst *Addr = DemoteRegToStack(*MappedAsyncCall, false); + new StoreInst(RetVal, Addr, EntryBlock); + ToPromote.push_back(Addr); + } + + // TODO remove unreachable blocks before creating phi + + // We go right to ResumeBlock from the EntryBlock + BranchInst::Create(ResumeBlock, EntryBlock); + + /* + * Creating phi's + * Normal stack frames and async stack frames are interleaving with each other. + * In a callback function, if we call an async function, we might need to realloc the async ctx. + * at this point we don't want anything stored after the ctx, + * such that we can free and extend the ctx by simply update STACKTOP. + * Therefore we don't want any alloca's in callback functions. + * + * TODO: can we create phi directly, without using Demote/Premote ? + */ + if (!ToPromote.empty()) { + DominatorTree DT; + DT.runOnFunction(*CurCallbackFunc); + PromoteMemToReg(ToPromote, DT); + } + } + + // Pass 4 + // Here are modifications to the original function, which we won't want to be cloned into the callback functions + for (std::vector::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { + AsyncCallEntry & CurEntry = *EI; + // remove the frame if no async functinon has been called + CallInst::Create(FreeAsyncCtxFunction, CurEntry.AllocAsyncCtxInst, "", CurEntry.AfterCallBlock->getFirstNonPHI()); + } +} + +bool LowerEmAsyncify::IsFunctionPointerCall(const Instruction *I) { + // mostly from CallHandler.h + ImmutableCallSite CS(I); + if (!CS) return false; // not call nor invoke + const Value *CV = CS.getCalledValue()->stripPointerCasts(); + return !isa(CV); +} + +ModulePass *llvm::createLowerEmAsyncifyPass() { + return new LowerEmAsyncify(); +} + diff --git a/lib/Transforms/NaCl/PNaClABISimplify.cpp b/lib/Transforms/NaCl/PNaClABISimplify.cpp index 886e0cc4cce..50939495388 100644 --- a/lib/Transforms/NaCl/PNaClABISimplify.cpp +++ b/lib/Transforms/NaCl/PNaClABISimplify.cpp @@ -32,6 +32,12 @@ EnableEmCxxExceptions("enable-emscripten-cxx-exceptions", cl::desc("Enables C++ exceptions in emscripten"), cl::init(false)); +static cl::opt // XXX EMSCRIPTEN +EnableEmAsyncify("emscripten-asyncify", + cl::desc("Enable asyncify transformation (see emscripten ASYNCIFY option)"), + cl::init(false)); + + void llvm::PNaClABISimplifyAddPreOptPasses(PassManager &PM) { if (EnableSjLjEH) { // This comes before ExpandTls because it introduces references to @@ -88,6 +94,9 @@ void llvm::PNaClABISimplifyAddPreOptPasses(PassManager &PM) { #if 0 // XXX EMSCRIPTEN: We don't currently have tls, and we don't have the same complications with extern_weak PM.add(createGlobalCleanupPass()); #endif + + if (EnableEmAsyncify) // XXX EMSCRIPTEN + PM.add(createLowerEmAsyncifyPass()); } void llvm::PNaClABISimplifyAddPostOptPasses(PassManager &PM) {