diff --git a/emscripten-version.txt b/emscripten-version.txt index b7825b8d767..a5096ba657e 100644 --- a/emscripten-version.txt +++ b/emscripten-version.txt @@ -1,2 +1,2 @@ -1.28.2 +1.28.3 diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 30325f66bf2..19f18df659c 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -298,6 +298,7 @@ void initializeRewritePNaClLibraryCallsPass(PassRegistry&); void initializeStripAttributesPass(PassRegistry&); void initializeStripMetadataPass(PassRegistry&); void initializeExpandI64Pass(PassRegistry&); // XXX EMSCRIPTEN +void initializeExpandInsertExtractElementPass(PassRegistry&); // XXX EMSCRIPTEN void initializeLowerEmExceptionsPass(PassRegistry&); // XXX EMSCRIPTEN void initializeLowerEmSetjmpPass(PassRegistry&); // XXX EMSCRIPTEN void initializeLowerEmAsyncifyPass(PassRegistry&); // XXX EMSCRIPTEN diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h index d3b4e8bc64d..22b95cae18c 100644 --- a/include/llvm/Transforms/NaCl.h +++ b/include/llvm/Transforms/NaCl.h @@ -53,6 +53,7 @@ ModulePass *createStripAttributesPass(); ModulePass *createStripMetadataPass(); ModulePass *createExpandI64Pass(); // XXX EMSCRIPTEN +ModulePass *createExpandInsertExtractElementPass(); // XXX EMSCRIPTEN ModulePass *createLowerEmExceptionsPass(); // XXX EMSCRIPTEN ModulePass *createLowerEmSetjmpPass(); // XXX EMSCRIPTEN ModulePass *createNoExitRuntimePass(); // XXX EMSCRIPTEN diff --git a/lib/Target/JSBackend/CMakeLists.txt b/lib/Target/JSBackend/CMakeLists.txt index ac04b0a2a71..c1194467f4b 100644 --- a/lib/Target/JSBackend/CMakeLists.txt +++ b/lib/Target/JSBackend/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_target(JSBackendCodeGen AllocaManager.cpp ExpandI64.cpp + ExpandInsertExtractElement.cpp JSBackend.cpp JSTargetMachine.cpp JSTargetTransformInfo.cpp diff --git a/lib/Target/JSBackend/CallHandlers.h b/lib/Target/JSBackend/CallHandlers.h index 0f2cbabf20d..f3991dad1ea 100644 --- a/lib/Target/JSBackend/CallHandlers.h +++ b/lib/Target/JSBackend/CallHandlers.h @@ -454,8 +454,7 @@ DEF_CALL_HANDLER(llvm_cttz_i32, { // vector ops DEF_CALL_HANDLER(emscripten_float32x4_signmask, { - // TODO: use signMaskPolyfill explicitly for now, until the builtin signMask is ready. - return getAssign(CI) + getValueAsStr(CI->getOperand(0)) + ".signMaskPolyfill"; + return getAssign(CI) + getValueAsStr(CI->getOperand(0)) + ".signMask"; }) #define DEF_BUILTIN_HANDLER(name, to) \ diff --git a/lib/Target/JSBackend/ExpandI64.cpp b/lib/Target/JSBackend/ExpandI64.cpp index 168e66063b3..f40ac691dcc 100644 --- a/lib/Target/JSBackend/ExpandI64.cpp +++ b/lib/Target/JSBackend/ExpandI64.cpp @@ -815,6 +815,16 @@ bool ExpandI64::splitInst(Instruction *I) { Chunks.push_back(L); Chunks.push_back(H); break; + } else if (isa(I->getOperand(0)->getType()) && !isa(I->getType())) { + unsigned NumElts = getNumChunks(I->getType()); + VectorType *IVTy = VectorType::get(i32, NumElts); + Instruction *B = CopyDebug(new BitCastInst(I->getOperand(0), IVTy, "", I), I); + for (unsigned i = 0; i < NumElts; ++i) { + Constant *Idx = ConstantInt::get(i32, i); + Instruction *Ext = CopyDebug(ExtractElementInst::Create(B, Idx, "", I), I); + Chunks.push_back(Ext); + } + break; } else { // no-op bitcast assert(I->getType() == I->getOperand(0)->getType()); @@ -1129,6 +1139,7 @@ bool ExpandI64::runOnModule(Module &M) { Phi->addIncoming(Phi->getIncomingValue(Index), Change.NewBB); } } + PhiBlockChanges.clear(); // We only visited blocks found by a DFS walk from the entry, so we haven't // visited any unreachable blocks, and they may still contain illegal diff --git a/lib/Target/JSBackend/ExpandInsertExtractElement.cpp b/lib/Target/JSBackend/ExpandInsertExtractElement.cpp new file mode 100644 index 00000000000..fbc7b8667aa --- /dev/null +++ b/lib/Target/JSBackend/ExpandInsertExtractElement.cpp @@ -0,0 +1,106 @@ +//==- ExpandInsertExtractElement.cpp - Expand vector insert and extract -=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===------------------------------------------------------------------===// +// +// This pass expands insertelement and extractelement instructions with +// variable indices, which SIMD.js doesn't natively support yet. +// +//===------------------------------------------------------------------===// + +#include "OptPasses.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Transforms/Utils/Local.h" +#include +#include + +#include "llvm/Support/raw_ostream.h" + +#ifdef NDEBUG +#undef assert +#define assert(x) { if (!(x)) report_fatal_error(#x); } +#endif + +using namespace llvm; + +namespace { + + class ExpandInsertExtractElement : public FunctionPass { + bool Changed; + + public: + static char ID; + ExpandInsertExtractElement() : FunctionPass(ID) { + initializeExpandInsertExtractElementPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + }; +} + +char ExpandInsertExtractElement::ID = 0; +INITIALIZE_PASS(ExpandInsertExtractElement, "expand-insert-extract-elements", + "Expand and lower insert and extract element operations", + false, false) + +// Utilities + +static Instruction *CopyDebug(Instruction *NewInst, Instruction *Original) { + NewInst->setDebugLoc(Original->getDebugLoc()); + return NewInst; +} + +bool ExpandInsertExtractElement::runOnFunction(Function &F) { + Changed = false; + + Instruction *Entry = F.getEntryBlock().begin(); + Type *Int32 = Type::getInt32Ty(F.getContext()); + Constant *Zero = ConstantInt::get(Int32, 0); + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + + if (InsertElementInst *III = dyn_cast(Inst)) { + if (isa(III->getOperand(2))) + continue; + + Instruction *A = new AllocaInst(III->getType(), 0, "", Entry); + CopyDebug(new StoreInst(III->getOperand(0), A, III), III); + + Value *Idxs[] = { Zero, III->getOperand(2) }; + Instruction *B = CopyDebug(GetElementPtrInst::Create(A, Idxs, "", III), III); + CopyDebug(new StoreInst(III->getOperand(1), B, III), III); + + Instruction *L = CopyDebug(new LoadInst(A, "", III), III); + III->replaceAllUsesWith(L); + III->eraseFromParent(); + } else if (ExtractElementInst *EII = dyn_cast(Inst)) { + if (isa(EII->getOperand(1))) + continue; + + Instruction *A = new AllocaInst(EII->getOperand(0)->getType(), 0, "", Entry); + CopyDebug(new StoreInst(EII->getOperand(0), A, EII), EII); + + Value *Idxs[] = { Zero, EII->getOperand(1) }; + Instruction *B = CopyDebug(GetElementPtrInst::Create(A, Idxs, "", EII), EII); + Instruction *L = CopyDebug(new LoadInst(B, "", EII), EII); + EII->replaceAllUsesWith(L); + EII->eraseFromParent(); + } + } + + return Changed; +} + +Pass *llvm::createExpandInsertExtractElementPass() { + return new ExpandInsertExtractElement(); +} diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp index 4b8da31df04..8a71ff54143 100644 --- a/lib/Target/JSBackend/JSBackend.cpp +++ b/lib/Target/JSBackend/JSBackend.cpp @@ -386,7 +386,8 @@ namespace { // of the compare that produced them. assert(VT->getElementType()->getPrimitiveSizeInBits() == 32 || VT->getElementType()->getPrimitiveSizeInBits() == 1); - assert(VT->getNumElements() == 4); + assert(VT->getBitWidth() <= 128); + assert(VT->getNumElements() <= 4); UsesSIMD = true; } @@ -455,6 +456,7 @@ namespace { void generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw_string_ostream& Code); void generateICmpExpression(const ICmpInst *I, raw_string_ostream& Code); void generateFCmpExpression(const FCmpInst *I, raw_string_ostream& Code); + void generateShiftExpression(const BinaryOperator *I, raw_string_ostream& Code); void generateUnrolledExpression(const User *I, raw_string_ostream& Code); bool generateSIMDExpression(const User *I, raw_string_ostream& Code); void generateExpression(const User *I, raw_string_ostream& Code); @@ -1073,18 +1075,23 @@ std::string JSWriter::getConstant(const Constant* CV, AsmCast sign) { return "0"; } } else if (const ConstantDataVector *DV = dyn_cast(CV)) { - return getConstantVector(cast(CV->getType())->getElementType(), - getConstant(DV->getElementAsConstant(0)), - getConstant(DV->getElementAsConstant(1)), - getConstant(DV->getElementAsConstant(2)), - getConstant(DV->getElementAsConstant(3))); + unsigned NumElts = cast(DV->getType())->getNumElements(); + Type *EltTy = cast(DV->getType())->getElementType(); + Constant *Undef = UndefValue::get(EltTy); + return getConstantVector(EltTy, + getConstant(NumElts > 0 ? DV->getElementAsConstant(0) : Undef), + getConstant(NumElts > 1 ? DV->getElementAsConstant(1) : Undef), + getConstant(NumElts > 2 ? DV->getElementAsConstant(2) : Undef), + getConstant(NumElts > 3 ? DV->getElementAsConstant(3) : Undef)); } else if (const ConstantVector *V = dyn_cast(CV)) { - assert(V->getNumOperands() == 4); + unsigned NumElts = cast(CV->getType())->getNumElements(); + Type *EltTy = cast(CV->getType())->getElementType(); + Constant *Undef = UndefValue::get(EltTy); return getConstantVector(cast(V->getType())->getElementType(), - getConstant(V->getOperand(0)), - getConstant(V->getOperand(1)), - getConstant(V->getOperand(2)), - getConstant(V->getOperand(3))); + getConstant(NumElts > 0 ? V->getOperand(0) : Undef), + getConstant(NumElts > 1 ? V->getOperand(1) : Undef), + getConstant(NumElts > 2 ? V->getOperand(2) : Undef), + getConstant(NumElts > 3 ? V->getOperand(3) : Undef)); } else if (const ConstantArray *CA = dyn_cast(CV)) { // handle things like [i8* bitcast (<{ i32, i32, i32 }>* @_ZTISt9bad_alloc to i8*)] which clang can emit for landingpads assert(CA->getNumOperands() == 1); @@ -1299,20 +1306,22 @@ void JSWriter::generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw // Check whether can generate SIMD.js swizzle or shuffle. std::string A = getValueAsStr(SVI->getOperand(0)); std::string B = getValueAsStr(SVI->getOperand(1)); - int Mask0 = SVI->getMaskValue(0); - int Mask1 = SVI->getMaskValue(1); - int Mask2 = SVI->getMaskValue(2); - int Mask3 = SVI->getMaskValue(3); + int OpNumElements = cast(SVI->getOperand(0)->getType())->getNumElements(); + int ResultNumElements = SVI->getType()->getNumElements(); + int Mask0 = ResultNumElements > 0 ? SVI->getMaskValue(0) : -1; + int Mask1 = ResultNumElements > 1 ? SVI->getMaskValue(1) : -1; + int Mask2 = ResultNumElements > 2 ? SVI->getMaskValue(2) : -1; + int Mask3 = ResultNumElements > 3 ? SVI->getMaskValue(3) : -1; bool swizzleA = false; bool swizzleB = false; - if ((Mask0 < 4) && (Mask1 < 4) && - (Mask2 < 4) && (Mask3 < 4)) { + if ((Mask0 < OpNumElements) && (Mask1 < OpNumElements) && + (Mask2 < OpNumElements) && (Mask3 < OpNumElements)) { swizzleA = true; } - if ((Mask0 < 0 || (Mask0 >= 4 && Mask0 < 8)) && - (Mask1 < 0 || (Mask1 >= 4 && Mask1 < 8)) && - (Mask2 < 0 || (Mask2 >= 4 && Mask2 < 8)) && - (Mask3 < 0 || (Mask3 >= 4 && Mask3 < 8))) { + if ((Mask0 < 0 || (Mask0 >= OpNumElements && Mask0 < OpNumElements * 2)) && + (Mask1 < 0 || (Mask1 >= OpNumElements && Mask1 < OpNumElements * 2)) && + (Mask2 < 0 || (Mask2 >= OpNumElements && Mask2 < OpNumElements * 2)) && + (Mask3 < 0 || (Mask3 >= OpNumElements && Mask3 < OpNumElements * 2))) { swizzleB = true; } assert(!(swizzleA && swizzleB)); @@ -1323,18 +1332,22 @@ void JSWriter::generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw } else { Code << "SIMD_float32x4_swizzle(" << T; } - for (unsigned int i = 0; i < 4; i++) { + int i = 0; + for (; i < ResultNumElements; ++i) { Code << ", "; int Mask = SVI->getMaskValue(i); if (Mask < 0) { Code << 0; - } else if (Mask < 4) { + } else if (Mask < OpNumElements) { Code << Mask; } else { - assert(Mask < 8); - Code << (Mask-4); + assert(Mask < OpNumElements * 2); + Code << (Mask-OpNumElements); } } + for (; i < 4; ++i) { + Code << ", 0"; + } Code << ")"; return; } @@ -1353,7 +1366,13 @@ void JSWriter::generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw for (unsigned int i = 0; i < Indices.size(); ++i) { if (i != 0) Code << ", "; - Code << Indices[i]; + int Mask = Indices[i]; + if (Mask >= OpNumElements) + Mask = Mask - OpNumElements + 4; + if (Mask < 0) + Code << 0; + else + Code << Mask; } Code << ")"; @@ -1449,9 +1468,61 @@ void JSWriter::generateFCmpExpression(const FCmpInst *I, raw_string_ostream& Cod Code << ")"; } +static const Value *getElement(const Value *V, unsigned i) { + if (const InsertElementInst *II = dyn_cast(V)) { + if (ConstantInt *CI = dyn_cast(II->getOperand(2))) { + if (CI->equalsInt(i)) + return II->getOperand(1); + } + return getElement(II->getOperand(0), i); + } + return NULL; +} + +static const Value *getSplatValue(const Value *V) { + if (const Constant *C = dyn_cast(V)) + return C->getSplatValue(); + + VectorType *VTy = cast(V->getType()); + const Value *Result = NULL; + for (unsigned i = 0; i < VTy->getNumElements(); ++i) { + const Value *E = getElement(V, i); + if (!E) + return NULL; + if (!Result) + Result = E; + else if (Result != E) + return NULL; + } + return Result; + +} + +void JSWriter::generateShiftExpression(const BinaryOperator *I, raw_string_ostream& Code) { + // If we're shifting every lane by the same amount (shifting by a splat value + // then we can use a ByScalar shift. + const Value *Count = I->getOperand(1); + if (const Value *Splat = getSplatValue(Count)) { + Code << getAssignIfNeeded(I) << "SIMD_int32x4_"; + if (I->getOpcode() == Instruction::AShr) + Code << "shiftRightArithmeticByScalar"; + else if (I->getOpcode() == Instruction::LShr) + Code << "shiftRightLogicalByScalar"; + else + Code << "shiftLeftByScalar"; + Code << "(" << getValueAsStr(I->getOperand(0)) << ", " << getValueAsStr(Splat) << ")"; + return; + } + + // SIMD.js does not currently have vector-vector shifts. + generateUnrolledExpression(I, Code); +} + void JSWriter::generateUnrolledExpression(const User *I, raw_string_ostream& Code) { VectorType *VT = cast(I->getType()); + Code << getAssignIfNeeded(I); + if (VT->getElementType()->isIntegerTy()) { Code << "SIMD_int32x4("; } else { @@ -1481,6 +1552,18 @@ void JSWriter::generateUnrolledExpression(const User *I, raw_string_ostream& Cod Code << "(" << getValueAsStr(I->getOperand(0)) << Lane << ">>>0) / (" << getValueAsStr(I->getOperand(1)) << Lane << ">>>0)>>>0"; break; + case Instruction::AShr: + Code << "(" << getValueAsStr(I->getOperand(0)) << Lane << "|0) >> (" + << getValueAsStr(I->getOperand(1)) << Lane << "|0)|0"; + break; + case Instruction::LShr: + Code << "(" << getValueAsStr(I->getOperand(0)) << Lane << "|0) >>> (" + << getValueAsStr(I->getOperand(1)) << Lane << "|0)|0"; + break; + case Instruction::Shl: + Code << "(" << getValueAsStr(I->getOperand(0)) << Lane << "|0) << (" + << getValueAsStr(I->getOperand(1)) << Lane << "|0)|0"; + break; default: I->dump(); error("invalid unrolled vector instr"); break; } } @@ -1514,8 +1597,17 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) { Code << getAssignIfNeeded(I) << getValueAsStr(I->getOperand(0)); break; case Instruction::Select: - assert(I->getOperand(0)->getType()->isIntegerTy(1) && "vector-of-i1 select not yet supported"); - // select arms are SIMD values, no special handling + // Since we represent vectors of i1 as vectors of sign extended wider integers, + // selecting on them is just an elementwise select. + if (isa(I->getOperand(0)->getType())) { + if (cast(I->getType())->getElementType()->isIntegerTy()) { + Code << getAssignIfNeeded(I) << "SIMD_int32x4_select(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << "," << getValueAsStr(I->getOperand(2)) << ")"; break; + } else { + Code << getAssignIfNeeded(I) << "SIMD_float32x4_select(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << "," << getValueAsStr(I->getOperand(2)) << ")"; break; + } + return true; + } + // Otherwise we have a scalar condition, so it's a ?: operator. return false; case Instruction::FAdd: Code << getAssignIfNeeded(I) << "SIMD_float32x4_add(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break; case Instruction::FMul: Code << getAssignIfNeeded(I) << "SIMD_float32x4_mul(" << getValueAsStr(I->getOperand(0)) << "," << getValueAsStr(I->getOperand(1)) << ")"; break; @@ -1556,11 +1648,20 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) { const LoadInst *LI = cast(I); const Value *P = LI->getPointerOperand(); std::string PS = getValueAsStr(P); + + // Determine if this is a partial load. + static const std::string partialAccess[4] = { "X", "XY", "XYZ", "" }; + if (VT->getNumElements() < 1 || VT->getNumElements() > 4) { + error("invalid number of lanes in SIMD operation!"); + break; + } + const std::string &Part = partialAccess[VT->getNumElements() - 1]; + Code << getAssignIfNeeded(I); if (VT->getElementType()->isIntegerTy()) { - Code << "SIMD_int32x4_load(HEAPU8, " << PS << ")"; + Code << "SIMD_int32x4_load" << Part << "(HEAPU8, " << PS << ")"; } else { - Code << "SIMD_float32x4_load(HEAPU8, " << PS << ")"; + Code << "SIMD_float32x4_load" << Part << "(HEAPU8, " << PS << ")"; } break; } @@ -1580,6 +1681,11 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) { // doesn't support them). generateUnrolledExpression(I, Code); break; + case Instruction::AShr: + case Instruction::LShr: + case Instruction::Shl: + generateShiftExpression(cast(I), Code); + break; } return true; } else { @@ -1591,10 +1697,19 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) { std::string PS = getOpName(P); std::string VS = getValueAsStr(SI->getValueOperand()); Code << getAdHocAssign(PS, P->getType()) << getValueAsStr(P) << ';'; + + // Determine if this is a partial store. + static const std::string partialAccess[4] = { "X", "XY", "XYZ", "" }; + if (VT->getNumElements() < 1 || VT->getNumElements() > 4) { + error("invalid number of lanes in SIMD operation!"); + return false; + } + const std::string &Part = partialAccess[VT->getNumElements() - 1]; + if (VT->getElementType()->isIntegerTy()) { - Code << "SIMD_int32x4_store(HEAPU8, " << PS << ", " << VS << ")"; + Code << "SIMD_int32x4_store" << Part << "(HEAPU8, " << PS << ", " << VS << ")"; } else { - Code << "SIMD_float32x4_store(HEAPU8, " << PS << ", " << VS << ")"; + Code << "SIMD_float32x4_store" << Part << "(HEAPU8, " << PS << ", " << VS << ")"; } return true; } else if (Operator::getOpcode(I) == Instruction::ExtractElement) { @@ -2250,7 +2365,7 @@ void JSWriter::printFunctionBody(const Function *F) { if (!LastCurly) LastCurly = buffer; char *FinalReturn = strstr(LastCurly, "return "); if (!FinalReturn) { - Out << " return " << getCast("0", RT, ASM_NONSPECIFIC) << ";\n"; + Out << " return " << getParenCast(getConstant(UndefValue::get(RT)), RT, ASM_NONSPECIFIC) << ";\n"; } } } @@ -2789,6 +2904,7 @@ bool JSTargetMachine::addPassesToEmitFile(PassManagerBase &PM, AnalysisID StopAfter) { assert(FileType == TargetMachine::CGFT_AssemblyFile); + PM.add(createExpandInsertExtractElementPass()); PM.add(createExpandI64Pass()); CodeGenOpt::Level OptLevel = getOptLevel(); diff --git a/lib/Target/JSBackend/JSTargetTransformInfo.cpp b/lib/Target/JSBackend/JSTargetTransformInfo.cpp index 74c2201d54e..dcb92b4d643 100644 --- a/lib/Target/JSBackend/JSTargetTransformInfo.cpp +++ b/lib/Target/JSBackend/JSTargetTransformInfo.cpp @@ -68,6 +68,15 @@ class JSTTI : public ImmutablePass, public TargetTransformInfo { virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; virtual unsigned getRegisterBitWidth(bool Vector) const; + + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind Opd1Info = OK_AnyValue, + OperandValueKind Opd2Info = OK_AnyValue) const; + + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index = -1) const; + + virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; }; } // end anonymous namespace @@ -101,3 +110,56 @@ unsigned JSTTI::getRegisterBitWidth(bool Vector) const { return 32; } + +unsigned JSTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind Opd1Info, + OperandValueKind Opd2Info) const { + const unsigned Nope = 65536; + + unsigned Cost = TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info); + + if (VectorType *VTy = dyn_cast(Ty)) { + switch (VTy->getNumElements()) { + case 4: + // SIMD.js supports int32x4 and float32x4, and we can emulate <4 x i1>. + if (!VTy->getElementType()->isIntegerTy(1) && + !VTy->getElementType()->isIntegerTy(32) && + !VTy->getElementType()->isFloatTy()) + { + return Nope; + } + break; + default: + // Wait until the other types are optimized. + return Nope; + } + + switch (Opcode) { + case Instruction::LShr: + case Instruction::AShr: + case Instruction::Shl: + // SIMD.js' shifts are currently only ByScalar. + if (Opd2Info != OK_UniformValue && Opd2Info != OK_UniformConstantValue) + Cost = Cost * VTy->getNumElements() + 100; + break; + } + } + + return Cost; +} + +unsigned JSTTI::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { + unsigned Cost = TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); + + // SIMD.js' insert/extract currently only take constant indices. + if (Index == -1u) + return Cost + 100; + + return Cost; +} + +void JSTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { + // We generally don't want a lot of unrolling. + UP.Partial = false; + UP.Runtime = false; +} diff --git a/lib/Target/JSBackend/OptPasses.h b/lib/Target/JSBackend/OptPasses.h index 2f90b568b01..5e236569f9e 100644 --- a/lib/Target/JSBackend/OptPasses.h +++ b/lib/Target/JSBackend/OptPasses.h @@ -17,6 +17,7 @@ namespace llvm { extern FunctionPass *createSimplifyAllocasPass(); extern Pass *createExpandI64Pass(); + extern Pass *createExpandInsertExtractElementPass(); } // End llvm namespace diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 1e724106991..179d52f0329 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1027,7 +1027,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // If the result mask is equal to one of the original shuffle masks, // or is a splat, do the replacement. - if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { + // + // XXX EMSCRIPTEN: Add '|| true' so that we always do the replacement. + // We're targetting SIMD.js, so there's less of an expectation that a + // particular shuffle mask will always map onto a particular instruction on + // a particular ISA because we aren't targetting a particular ISA (what the + // JS engine does is another story). We may wish to re-evaluate this choice + // as we move on to higher-element-count vectors, but especially for now this + // is quite desirable. + if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask || + true) + { SmallVector Elts; Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); for (unsigned i = 0, e = newMask.size(); i != e; ++i) { diff --git a/test/CodeGen/JS/blockchanges.ll b/test/CodeGen/JS/blockchanges.ll new file mode 100644 index 00000000000..b93e6688c40 --- /dev/null +++ b/test/CodeGen/JS/blockchanges.ll @@ -0,0 +1,411 @@ +; RUN: llc < %s + +; regression check for emscripten #3088 - we were not clearing BlockChanges in i64 lowering + +; ModuleID = 'waka.bc' +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +%"class.ZenLib::uint128" = type <{ i64, i64 }> + +@.str = private unnamed_addr constant [15 x i8] c"hello, world!\0A\00", align 1 + +@.str368164 = external hidden unnamed_addr constant [10 x i8], align 1 +@.str398167 = external hidden unnamed_addr constant [6 x i8], align 1 +@.str718199 = external hidden unnamed_addr constant [9 x i8], align 1 +@.str738201 = external hidden unnamed_addr constant [21 x i8], align 1 +@.str748202 = external hidden unnamed_addr constant [26 x i8], align 1 +@.str758203 = external hidden unnamed_addr constant [21 x i8], align 1 +@.str768204 = external hidden unnamed_addr constant [8 x i8], align 1 +@.str778205 = external hidden unnamed_addr constant [14 x i8], align 1 +@.str788206 = external hidden unnamed_addr constant [22 x i8], align 1 +@.str798207 = external hidden unnamed_addr constant [25 x i8], align 1 +@.str808208 = external hidden unnamed_addr constant [24 x i8], align 1 +@.str818209 = external hidden unnamed_addr constant [20 x i8], align 1 +@.str828210 = external hidden unnamed_addr constant [34 x i8], align 1 +@.str838211 = external hidden unnamed_addr constant [31 x i8], align 1 +@.str848212 = external hidden unnamed_addr constant [29 x i8], align 1 +@.str858213 = external hidden unnamed_addr constant [44 x i8], align 1 +@.str868214 = external hidden unnamed_addr constant [12 x i8], align 1 +@.str908218 = external hidden unnamed_addr constant [21 x i8], align 1 +@.str918219 = external hidden unnamed_addr constant [8 x i8], align 1 +@.str928220 = external hidden unnamed_addr constant [6 x i8], align 1 +@.str9210864 = external hidden unnamed_addr constant [5 x i8], align 1 +@.str514367 = external hidden unnamed_addr constant [5 x i8], align 1 +@.str214409 = external hidden unnamed_addr constant [4 x i8], align 1 +@.str20216493 = external hidden unnamed_addr constant [3 x i8], align 1 +@.str2017231 = external hidden unnamed_addr constant [11 x i8], align 1 +@.str2317234 = external hidden unnamed_addr constant [14 x i8], align 1 +@.str2417235 = external hidden unnamed_addr constant [4 x i8], align 1 +@.str2717238 = external hidden unnamed_addr constant [5 x i8], align 1 +@.str3217243 = external hidden unnamed_addr constant [4 x i8], align 1 +@.str1717689 = external hidden unnamed_addr constant [5 x i8], align 1 +@.str2104 = external hidden unnamed_addr constant [1 x i8], align 1 + +; Function Attrs: nounwind readonly +define hidden i8* @_ZN12MediaInfoLib22Mxf_EssenceCompressionEN6ZenLib7uint128E(%"class.ZenLib::uint128"* nocapture readonly %EssenceCompression) #0 { +entry: + %hi = getelementptr inbounds %"class.ZenLib::uint128"* %EssenceCompression, i32 0, i32 1 + %0 = load i64* %hi, align 1, !tbaa !2 + %and = and i64 %0, -256 + %cmp = icmp eq i64 %and, 436333716306985216 + br i1 %cmp, label %lor.lhs.false, label %return + +lor.lhs.false: ; preds = %entry + %lo = getelementptr inbounds %"class.ZenLib::uint128"* %EssenceCompression, i32 0, i32 0 + %1 = load i64* %lo, align 1, !tbaa !7 + %and1 = and i64 %1, -72057594037927936 + switch i64 %and1, label %return [ + i64 288230376151711744, label %if.end + i64 1008806316530991104, label %if.end + ] + +if.end: ; preds = %lor.lhs.false, %lor.lhs.false + %shr = lshr i64 %1, 56 + %conv = trunc i64 %shr to i32 + %and10 = lshr i64 %1, 48 + %and14 = lshr i64 %1, 40 + %and18 = lshr i64 %1, 32 + %conv20 = trunc i64 %and18 to i32 + %and22 = lshr i64 %1, 24 + %and26 = lshr i64 %1, 16 + %conv28 = trunc i64 %and26 to i32 + %and30 = lshr i64 %1, 8 + %conv32 = trunc i64 %and30 to i32 + switch i32 %conv, label %return [ + i32 4, label %sw.bb + i32 14, label %sw.bb112 + ] + +sw.bb: ; preds = %if.end + %conv12 = trunc i64 %and10 to i32 + %conv34 = and i32 %conv12, 255 + switch i32 %conv34, label %return [ + i32 1, label %sw.bb35 + i32 2, label %sw.bb64 + ] + +sw.bb35: ; preds = %sw.bb + %conv36 = and i64 %and14, 255 + %cond12 = icmp eq i64 %conv36, 2 + br i1 %cond12, label %sw.bb37, label %return + +sw.bb37: ; preds = %sw.bb35 + %conv38 = and i32 %conv20, 255 + switch i32 %conv38, label %return [ + i32 1, label %sw.bb39 + i32 2, label %sw.bb42 + ] + +sw.bb39: ; preds = %sw.bb37 + %conv40 = and i64 %and22, 255 + %cond14 = icmp eq i64 %conv40, 1 + %. = select i1 %cond14, i8* getelementptr inbounds ([4 x i8]* @.str214409, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0) + br label %return + +sw.bb42: ; preds = %sw.bb37 + %2 = trunc i64 %and22 to i32 + %conv43 = and i32 %2, 255 + switch i32 %conv43, label %sw.default61 [ + i32 1, label %sw.bb44 + i32 2, label %return + i32 3, label %sw.bb56 + i32 113, label %sw.bb60 + ] + +sw.bb44: ; preds = %sw.bb42 + %conv45 = and i32 %conv28, 255 + switch i32 %conv45, label %sw.default54 [ + i32 0, label %return + i32 1, label %return + i32 2, label %return + i32 3, label %return + i32 4, label %return + i32 17, label %return + i32 32, label %sw.bb52 + i32 48, label %sw.bb53 + i32 49, label %sw.bb53 + i32 50, label %sw.bb53 + i32 51, label %sw.bb53 + i32 52, label %sw.bb53 + i32 53, label %sw.bb53 + i32 54, label %sw.bb53 + i32 55, label %sw.bb53 + i32 56, label %sw.bb53 + i32 57, label %sw.bb53 + i32 58, label %sw.bb53 + i32 59, label %sw.bb53 + i32 60, label %sw.bb53 + i32 61, label %sw.bb53 + i32 62, label %sw.bb53 + i32 63, label %sw.bb53 + ] + +sw.bb52: ; preds = %sw.bb44 + br label %return + +sw.bb53: ; preds = %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44 + br label %return + +sw.default54: ; preds = %sw.bb44 + br label %return + +sw.bb56: ; preds = %sw.bb42 + %conv57 = and i64 %and26, 255 + %cond13 = icmp eq i64 %conv57, 1 + %.35 = select i1 %cond13, i8* getelementptr inbounds ([10 x i8]* @.str368164, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0) + br label %return + +sw.bb60: ; preds = %sw.bb42 + br label %return + +sw.default61: ; preds = %sw.bb42 + br label %return + +sw.bb64: ; preds = %sw.bb + %conv65 = and i64 %and14, 255 + %cond9 = icmp eq i64 %conv65, 2 + br i1 %cond9, label %sw.bb66, label %return + +sw.bb66: ; preds = %sw.bb64 + %conv67 = and i32 %conv20, 255 + switch i32 %conv67, label %return [ + i32 1, label %sw.bb68 + i32 2, label %sw.bb75 + ] + +sw.bb68: ; preds = %sw.bb66 + %3 = trunc i64 %and22 to i32 + %conv69 = and i32 %3, 255 + switch i32 %conv69, label %sw.default74 [ + i32 0, label %return + i32 1, label %return + i32 126, label %return + i32 127, label %return + ] + +sw.default74: ; preds = %sw.bb68 + br label %return + +sw.bb75: ; preds = %sw.bb66 + %conv76 = and i64 %and22, 255 + %cond10 = icmp eq i64 %conv76, 3 + br i1 %cond10, label %sw.bb77, label %return + +sw.bb77: ; preds = %sw.bb75 + %conv78 = and i32 %conv28, 255 + switch i32 %conv78, label %return [ + i32 1, label %sw.bb79 + i32 2, label %sw.bb84 + i32 3, label %sw.bb92 + i32 4, label %sw.bb96 + ] + +sw.bb79: ; preds = %sw.bb77 + %conv80 = and i32 %conv32, 255 + switch i32 %conv80, label %sw.default83 [ + i32 1, label %return + i32 16, label %sw.bb82 + ] + +sw.bb82: ; preds = %sw.bb79 + br label %return + +sw.default83: ; preds = %sw.bb79 + br label %return + +sw.bb84: ; preds = %sw.bb77 + %conv85 = and i32 %conv32, 255 + switch i32 %conv85, label %sw.default91 [ + i32 1, label %return + i32 4, label %sw.bb87 + i32 5, label %sw.bb88 + i32 6, label %sw.bb89 + i32 28, label %sw.bb90 + ] + +sw.bb87: ; preds = %sw.bb84 + br label %return + +sw.bb88: ; preds = %sw.bb84 + br label %return + +sw.bb89: ; preds = %sw.bb84 + br label %return + +sw.bb90: ; preds = %sw.bb84 + br label %return + +sw.default91: ; preds = %sw.bb84 + br label %return + +sw.bb92: ; preds = %sw.bb77 + %conv93 = and i64 %and30, 255 + %cond11 = icmp eq i64 %conv93, 1 + %.36 = select i1 %cond11, i8* getelementptr inbounds ([14 x i8]* @.str778205, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0) + br label %return + +sw.bb96: ; preds = %sw.bb77 + %conv97 = and i32 %conv32, 255 + switch i32 %conv97, label %sw.default106 [ + i32 1, label %return + i32 2, label %sw.bb99 + i32 3, label %sw.bb100 + i32 4, label %sw.bb101 + i32 5, label %sw.bb102 + i32 6, label %sw.bb103 + i32 7, label %sw.bb104 + i32 8, label %sw.bb105 + ] + +sw.bb99: ; preds = %sw.bb96 + br label %return + +sw.bb100: ; preds = %sw.bb96 + br label %return + +sw.bb101: ; preds = %sw.bb96 + br label %return + +sw.bb102: ; preds = %sw.bb96 + br label %return + +sw.bb103: ; preds = %sw.bb96 + br label %return + +sw.bb104: ; preds = %sw.bb96 + br label %return + +sw.bb105: ; preds = %sw.bb96 + br label %return + +sw.default106: ; preds = %sw.bb96 + br label %return + +sw.bb112: ; preds = %if.end + %4 = trunc i64 %and10 to i32 + %conv113 = and i32 %4, 255 + switch i32 %conv113, label %return [ + i32 4, label %sw.bb114 + i32 6, label %sw.bb127 + ] + +sw.bb114: ; preds = %sw.bb112 + %conv115 = and i64 %and14, 255 + %cond5 = icmp eq i64 %conv115, 2 + %conv117 = and i64 %and18, 255 + %cond6 = icmp eq i64 %conv117, 1 + %or.cond = and i1 %cond5, %cond6 + %conv119 = and i64 %and22, 255 + %cond7 = icmp eq i64 %conv119, 2 + %or.cond39 = and i1 %or.cond, %cond7 + br i1 %or.cond39, label %sw.bb120, label %return + +sw.bb120: ; preds = %sw.bb114 + %conv121 = and i64 %and26, 255 + %cond8 = icmp eq i64 %conv121, 4 + %.37 = select i1 %cond8, i8* getelementptr inbounds ([5 x i8]* @.str514367, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0) + br label %return + +sw.bb127: ; preds = %sw.bb112 + %conv128 = and i64 %and14, 255 + %cond = icmp eq i64 %conv128, 4 + %conv130 = and i64 %and18, 255 + %cond1 = icmp eq i64 %conv130, 1 + %or.cond40 = and i1 %cond, %cond1 + %conv132 = and i64 %and22, 255 + %cond2 = icmp eq i64 %conv132, 2 + %or.cond41 = and i1 %or.cond40, %cond2 + %conv134 = and i64 %and26, 255 + %cond3 = icmp eq i64 %conv134, 4 + %or.cond42 = and i1 %or.cond41, %cond3 + br i1 %or.cond42, label %sw.bb135, label %return + +sw.bb135: ; preds = %sw.bb127 + %conv136 = and i64 %and30, 255 + %cond4 = icmp eq i64 %conv136, 2 + %.38 = select i1 %cond4, i8* getelementptr inbounds ([12 x i8]* @.str868214, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0) + br label %return + +return: ; preds = %sw.bb135, %sw.bb127, %sw.bb120, %sw.bb114, %sw.bb112, %sw.default106, %sw.bb105, %sw.bb104, %sw.bb103, %sw.bb102, %sw.bb101, %sw.bb100, %sw.bb99, %sw.bb96, %sw.bb92, %sw.default91, %sw.bb90, %sw.bb89, %sw.bb88, %sw.bb87, %sw.bb84, %sw.default83, %sw.bb82, %sw.bb79, %sw.bb77, %sw.bb75, %sw.default74, %sw.bb68, %sw.bb68, %sw.bb68, %sw.bb68, %sw.bb66, %sw.bb64, %sw.default61, %sw.bb60, %sw.bb56, %sw.default54, %sw.bb53, %sw.bb52, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb44, %sw.bb42, %sw.bb39, %sw.bb37, %sw.bb35, %sw.bb, %if.end, %lor.lhs.false, %entry + %retval.0 = phi i8* [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default106 ], [ getelementptr inbounds ([44 x i8]* @.str858213, i32 0, i32 0), %sw.bb105 ], [ getelementptr inbounds ([29 x i8]* @.str848212, i32 0, i32 0), %sw.bb104 ], [ getelementptr inbounds ([31 x i8]* @.str838211, i32 0, i32 0), %sw.bb103 ], [ getelementptr inbounds ([34 x i8]* @.str828210, i32 0, i32 0), %sw.bb102 ], [ getelementptr inbounds ([20 x i8]* @.str818209, i32 0, i32 0), %sw.bb101 ], [ getelementptr inbounds ([24 x i8]* @.str808208, i32 0, i32 0), %sw.bb100 ], [ getelementptr inbounds ([25 x i8]* @.str798207, i32 0, i32 0), %sw.bb99 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default91 ], [ getelementptr inbounds ([8 x i8]* @.str768204, i32 0, i32 0), %sw.bb90 ], [ getelementptr inbounds ([21 x i8]* @.str758203, i32 0, i32 0), %sw.bb89 ], [ getelementptr inbounds ([26 x i8]* @.str748202, i32 0, i32 0), %sw.bb88 ], [ getelementptr inbounds ([21 x i8]* @.str738201, i32 0, i32 0), %sw.bb87 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default83 ], [ getelementptr inbounds ([9 x i8]* @.str718199, i32 0, i32 0), %sw.bb82 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default74 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default61 ], [ getelementptr inbounds ([5 x i8]* @.str514367, i32 0, i32 0), %sw.bb60 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default54 ], [ getelementptr inbounds ([4 x i8]* @.str2417235, i32 0, i32 0), %sw.bb53 ], [ getelementptr inbounds ([14 x i8]* @.str2317234, i32 0, i32 0), %sw.bb52 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %lor.lhs.false ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %entry ], [ %., %sw.bb39 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([11 x i8]* @.str2017231, i32 0, i32 0), %sw.bb44 ], [ getelementptr inbounds ([3 x i8]* @.str20216493, i32 0, i32 0), %sw.bb42 ], [ %.35, %sw.bb56 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb37 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb35 ], [ getelementptr inbounds ([4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([4 x i8]* @.str3217243, i32 0, i32 0), %sw.bb68 ], [ getelementptr inbounds ([6 x i8]* @.str398167, i32 0, i32 0), %sw.bb79 ], [ getelementptr inbounds ([5 x i8]* @.str2717238, i32 0, i32 0), %sw.bb84 ], [ %.36, %sw.bb92 ], [ getelementptr inbounds ([22 x i8]* @.str788206, i32 0, i32 0), %sw.bb96 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb77 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb75 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb66 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb64 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb ], [ %.37, %sw.bb120 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb114 ], [ %.38, %sw.bb135 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb127 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.bb112 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %if.end ] + ret i8* %retval.0 +} + +; Function Attrs: nounwind readonly +define hidden i8* @_ZN12MediaInfoLib27Mxf_Sequence_DataDefinitionEN6ZenLib7uint128E(%"class.ZenLib::uint128"* nocapture readonly %DataDefinition) #0 { +entry: + %lo = getelementptr inbounds %"class.ZenLib::uint128"* %DataDefinition, i32 0, i32 0 + %0 = load i64* %lo, align 1, !tbaa !7 + %and = lshr i64 %0, 32 + %conv = trunc i64 %and to i32 + %and2 = lshr i64 %0, 24 + %conv5 = and i32 %conv, 255 + switch i32 %conv5, label %return [ + i32 1, label %sw.bb + i32 2, label %sw.bb9 + ] + +sw.bb: ; preds = %entry + %conv4 = trunc i64 %and2 to i32 + %conv6 = and i32 %conv4, 255 + switch i32 %conv6, label %sw.default [ + i32 1, label %return + i32 2, label %return + i32 3, label %return + i32 16, label %sw.bb8 + ] + +sw.bb8: ; preds = %sw.bb + br label %return + +sw.default: ; preds = %sw.bb + br label %return + +sw.bb9: ; preds = %entry + %1 = trunc i64 %and2 to i32 + %conv10 = and i32 %1, 255 + switch i32 %conv10, label %sw.default14 [ + i32 1, label %return + i32 2, label %sw.bb12 + i32 3, label %sw.bb13 + ] + +sw.bb12: ; preds = %sw.bb9 + br label %return + +sw.bb13: ; preds = %sw.bb9 + br label %return + +sw.default14: ; preds = %sw.bb9 + br label %return + +return: ; preds = %sw.default14, %sw.bb13, %sw.bb12, %sw.bb9, %sw.default, %sw.bb8, %sw.bb, %sw.bb, %sw.bb, %entry + %retval.0 = phi i8* [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default14 ], [ getelementptr inbounds ([5 x i8]* @.str1717689, i32 0, i32 0), %sw.bb13 ], [ getelementptr inbounds ([6 x i8]* @.str928220, i32 0, i32 0), %sw.bb12 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %sw.default ], [ getelementptr inbounds ([21 x i8]* @.str908218, i32 0, i32 0), %sw.bb8 ], [ getelementptr inbounds ([5 x i8]* @.str9210864, i32 0, i32 0), %sw.bb ], [ getelementptr inbounds ([5 x i8]* @.str9210864, i32 0, i32 0), %sw.bb ], [ getelementptr inbounds ([5 x i8]* @.str9210864, i32 0, i32 0), %sw.bb ], [ getelementptr inbounds ([8 x i8]* @.str918219, i32 0, i32 0), %sw.bb9 ], [ getelementptr inbounds ([1 x i8]* @.str2104, i32 0, i32 0), %entry ] + ret i8* %retval.0 +} + +define i32 @main() { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0)) + ret i32 0 +} + +declare i32 @printf(i8*, ...) + +attributes #0 = { nounwind readonly } + +!llvm.ident = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0} +!llvm.module.flags = !{!1} + +!0 = metadata !{metadata !"clang version 3.4 (git@github.com:kripken/emscripten-fastcomp-clang.git 406c991ba0416c838ee097361c27a12411a088b9) (https://chromium.googlesource.com/native_client/pnacl-llvm a5e8942da586a7ef0ed02361b77a3010f16428cf)"} +!1 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!2 = metadata !{metadata !3, metadata !4, i64 8} +!3 = metadata !{metadata !"_ZTSN6ZenLib7uint128E", metadata !4, i64 0, metadata !4, i64 8} +!4 = metadata !{metadata !"long long", metadata !5, i64 0} +!5 = metadata !{metadata !"omnipotent char", metadata !6, i64 0} +!6 = metadata !{metadata !"Simple C/C++ TBAA"} +!7 = metadata !{metadata !3, metadata !4, i64 0} + diff --git a/test/CodeGen/JS/expand-insertextract.ll b/test/CodeGen/JS/expand-insertextract.ll new file mode 100644 index 00000000000..7a247380f32 --- /dev/null +++ b/test/CodeGen/JS/expand-insertextract.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: sp = STACKTOP; +; CHECK: STACKTOP = STACKTOP + 16|0; +; CHECK: $0 = sp; +; CHECK: SIMD_float32x4_store(HEAPU8, $0, $p); +; CHECK: $1 = (($0) + ($i<<2)|0); +; CHECK: $2 = +HEAPF32[$1>>2]; +; CHECK: STACKTOP = sp;return (+$2); +; CHECK: } +define float @ext(<4 x float> %p, i32 %i) { + %f = extractelement <4 x float> %p, i32 %i + ret float %f +} + +; CHECK: sp = STACKTOP; +; CHECK: STACKTOP = STACKTOP + 16|0; +; CHECK: $0 = sp; +; CHECK: SIMD_float32x4_store(HEAPU8, $0, $p); +; CHECK: $1 = (($0) + ($i<<2)|0); +; CHECK: HEAPF32[$1>>2] = $f; +; CHECK: $2 = SIMD_float32x4_load(HEAPU8, $0); +; CHECK: STACKTOP = sp;return (SIMD_float32x4($2)); +; CHECK: } +define <4 x float> @ins(<4 x float> %p, float %f, i32 %i) { + %v = insertelement <4 x float> %p, float %f, i32 %i + ret <4 x float> %v +} diff --git a/test/CodeGen/JS/simd-loadstore.ll b/test/CodeGen/JS/simd-loadstore.ll new file mode 100644 index 00000000000..60ed1679512 --- /dev/null +++ b/test/CodeGen/JS/simd-loadstore.ll @@ -0,0 +1,68 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: function _fx1($p) { +; CHECK: $p = $p|0; +; CHECK: var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0) +; CHECK: $t = SIMD_float32x4_loadX(HEAPU8, $p); +; CHECK: $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+0.5),Math_fround(+0),Math_fround(+0),Math_fround(+0))); +; CHECK: $q = $p;SIMD_float32x4_storeX(HEAPU8, $q, $s); +; CHECK: return; +; CHECK: } +define void @fx1(i8* %p) { + %q = bitcast i8* %p to <1 x float>* + %t = load <1 x float>* %q + %s = fadd <1 x float> %t, + store <1 x float> %s, <1 x float>* %q + ret void +} + +; CHECK: function _fx2($p) { +; CHECK: $p = $p|0; +; CHECK: var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0) +; CHECK: $t = SIMD_float32x4_loadXY(HEAPU8, $p); +; CHECK: $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+3.5),Math_fround(+7.5),Math_fround(+0),Math_fround(+0))); +; CHECK: $q = $p;SIMD_float32x4_storeXY(HEAPU8, $q, $s); +; CHECK: return; +; CHECK: } +define void @fx2(i8* %p) { + %q = bitcast i8* %p to <2 x float>* + %t = load <2 x float>* %q + %s = fadd <2 x float> %t, + store <2 x float> %s, <2 x float>* %q + ret void +} + +; CHECK: function _fx3($p) { +; CHECK: $p = $p|0; +; CHECK: var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0) +; CHECK: $t = SIMD_float32x4_loadXYZ(HEAPU8, $p); +; CHECK: $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+1.5),Math_fround(+4.5),Math_fround(+6.5),Math_fround(+0))); +; CHECK: $q = $p;SIMD_float32x4_storeXYZ(HEAPU8, $q, $s); +; CHECK: return; +; CHECK: } +define void @fx3(i8* %p) { + %q = bitcast i8* %p to <3 x float>* + %t = load <3 x float>* %q + %s = fadd <3 x float> %t, + store <3 x float> %s, <3 x float>* %q + ret void +} + +; CHECK: function _fx4($p) { +; CHECK: $p = $p|0; +; CHECK: var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0) +; CHECK: $t = SIMD_float32x4_load(HEAPU8, $p); +; CHECK: $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+9.5),Math_fround(+5.5),Math_fround(+1.5),Math_fround(+-3.5))); +; CHECK: $q = $p;SIMD_float32x4_store(HEAPU8, $q, $s); +; CHECK: return; +; CHECK: } +define void @fx4(i8* %p) { + %q = bitcast i8* %p to <4 x float>* + %t = load <4 x float>* %q + %s = fadd <4 x float> %t, + store <4 x float> %s, <4 x float>* %q + ret void +} diff --git a/test/CodeGen/JS/simd-select.ll b/test/CodeGen/JS/simd-select.ll new file mode 100644 index 00000000000..7547b199049 --- /dev/null +++ b/test/CodeGen/JS/simd-select.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: function _test0($a,$b,$cond) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: $cond = SIMD_int32x4($cond); +; CHECK: $cmp = SIMD_int32x4_select($cond,$a,$b); +; CHECK: return (SIMD_int32x4($cmp)); +; CHECK: } +define <4 x i32> @test0(<4 x i32> %a, <4 x i32> %b, <4 x i1> %cond) nounwind { +entry: + %cmp = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %cmp +} + +; CHECK: function _test1($a,$b,$cond) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: $cond = SIMD_int32x4($cond); +; CHECK: $cmp = SIMD_float32x4_select($cond,$a,$b); +; CHECK: return (SIMD_float32x4($cmp)); +; CHECK: } +define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %cond) nounwind { +entry: + %cmp = select <4 x i1> %cond, <4 x float> %a, <4 x float> %b + ret <4 x float> %cmp +} + +; CHECK: function _test2($a,$b,$cond) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: $cond = $cond|0; +; CHECK: $cmp = $cond ? $a : $b; +; CHECK: return (SIMD_int32x4($cmp)); +; CHECK: } +define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b, i1 %cond) nounwind { +entry: + %cmp = select i1 %cond, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %cmp +} + +; CHECK: function _test3($a,$b,$cond) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: $cond = $cond|0; +; CHECK: $cmp = $cond ? $a : $b; +; CHECK: return (SIMD_float32x4($cmp)); +; CHECK: } +define <4 x float> @test3(<4 x float> %a, <4 x float> %b, i1 %cond) nounwind { +entry: + %cmp = select i1 %cond, <4 x float> %a, <4 x float> %b + ret <4 x float> %cmp +} diff --git a/test/CodeGen/JS/simd-shift.ll b/test/CodeGen/JS/simd-shift.ll new file mode 100644 index 00000000000..09819876fd8 --- /dev/null +++ b/test/CodeGen/JS/simd-shift.ll @@ -0,0 +1,142 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: function _test0($a) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $shl = SIMD_int32x4_shiftLeftByScalar($a, 3); +; CHECK: return (SIMD_int32x4($shl)); +; CHECK: } +define <4 x i32> @test0(<4 x i32> %a) { +entry: + %shl = shl <4 x i32> %a, + ret <4 x i32> %shl +} + +; CHECK: function _test1($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = $b|0; +; CHECK: SIMD_int32x4_shiftLeftByScalar($a, $b); +; CHECK: return (SIMD_int32x4($shl)); +; CHECK: } +define <4 x i32> @test1(<4 x i32> %a, i32 %b) { +entry: + %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3 + %shl = shl <4 x i32> %a, %vecinit3 + ret <4 x i32> %shl +} + +; CHECK: function _test2($a,$b,$c) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = $b|0; +; CHECK: $c = $c|0; +; CHECK: var $shl = SIMD_int32x4(0,0,0,0), $vecinit3 = SIMD_int32x4(0,0,0,0), label = 0, sp = 0; +; CHECK: $vecinit3 = SIMD_int32x4($b, $b, $c, $b); +; CHECK: $shl = SIMD_int32x4(($a.x|0) << ($vecinit3.x|0)|0, ($a.y|0) << ($vecinit3.y|0)|0, ($a.z|0) << ($vecinit3.z|0)|0, ($a.w|0) << ($vecinit3.w|0)|0); +; CHECK: return (SIMD_int32x4($shl)); +; CHECK: } +define <4 x i32> @test2(<4 x i32> %a, i32 %b, i32 %c) { +entry: + %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3 + %shl = shl <4 x i32> %a, %vecinit3 + ret <4 x i32> %shl +} + +; CHECK: function _test3($a) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: SIMD_int32x4_shiftRightArithmeticByScalar($a, 3); +; CHECK: return (SIMD_int32x4($shr)); +; CHECK: } +define <4 x i32> @test3(<4 x i32> %a) { +entry: + %shr = ashr <4 x i32> %a, + ret <4 x i32> %shr +} + +; CHECK: function _test4($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = $b|0; +; CHECK: SIMD_int32x4_shiftRightArithmeticByScalar($a, $b); +; CHECK: return (SIMD_int32x4($shr)); +; CHECK: } +define <4 x i32> @test4(<4 x i32> %a, i32 %b) { +entry: + %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3 + %shr = ashr <4 x i32> %a, %vecinit3 + ret <4 x i32> %shr +} + +; CHECK: function _test5($a,$b,$c) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = $b|0; +; CHECK: $c = $c|0; +; CHECK: var $shr = SIMD_int32x4(0,0,0,0), $vecinit3 = SIMD_int32x4(0,0,0,0), label = 0, sp = 0; +; CHECK: $vecinit3 = SIMD_int32x4($b, $c, $b, $b); +; CHECK: $shr = SIMD_int32x4(($a.x|0) >> ($vecinit3.x|0)|0, ($a.y|0) >> ($vecinit3.y|0)|0, ($a.z|0) >> ($vecinit3.z|0)|0, ($a.w|0) >> ($vecinit3.w|0)|0); +; CHECK: return (SIMD_int32x4($shr)); +; CHECK: } +define <4 x i32> @test5(<4 x i32> %a, i32 %b, i32 %c) { +entry: + %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %c, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3 + %shr = ashr <4 x i32> %a, %vecinit3 + ret <4 x i32> %shr +} + +; CHECK: function _test6($a) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: SIMD_int32x4_shiftRightLogicalByScalar($a, 3); +; CHECK: return (SIMD_int32x4($lshr)); +; CHECK: } +define <4 x i32> @test6(<4 x i32> %a) { +entry: + %lshr = lshr <4 x i32> %a, + ret <4 x i32> %lshr +} + +; CHECK: function _test7($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = $b|0; +; CHECK: $lshr = SIMD_int32x4_shiftRightLogicalByScalar($a, $b); +; CHECK: return (SIMD_int32x4($lshr)); +; CHECK: } +define <4 x i32> @test7(<4 x i32> %a, i32 %b) { +entry: + %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %b, i32 3 + %lshr = lshr <4 x i32> %a, %vecinit3 + ret <4 x i32> %lshr +} + +; CHECK: function _test8($a,$b,$c) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = $b|0; +; CHECK: $c = $c|0; +; CHECK: var $lshr = SIMD_int32x4(0,0,0,0), $vecinit3 = SIMD_int32x4(0,0,0,0), label = 0, sp = 0; +; CHECK: $vecinit3 = SIMD_int32x4($b, $b, $b, $c); +; CHECK: $lshr = SIMD_int32x4(($a.x|0) >>> ($vecinit3.x|0)|0, ($a.y|0) >>> ($vecinit3.y|0)|0, ($a.z|0) >>> ($vecinit3.z|0)|0, ($a.w|0) >>> ($vecinit3.w|0)|0); +; CHECK: return (SIMD_int32x4($lshr)); +; CHECK: } +define <4 x i32> @test8(<4 x i32> %a, i32 %b, i32 %c) { +entry: + %vecinit = insertelement <4 x i32> undef, i32 %b, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %c, i32 3 + %lshr = lshr <4 x i32> %a, %vecinit3 + ret <4 x i32> %lshr +} diff --git a/test/CodeGen/JS/simd-shuffle.ll b/test/CodeGen/JS/simd-shuffle.ll new file mode 100644 index 00000000000..3e7667c56f4 --- /dev/null +++ b/test/CodeGen/JS/simd-shuffle.ll @@ -0,0 +1,524 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128" +target triple = "asmjs-unknown-emscripten" + +; CHECK: function _splat_int32x4($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <4 x i32> @splat_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _swizzle_int32x4($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_swizzle($a, 0, 3, 1, 2); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <4 x i32> @swizzle_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _swizzlehi_int32x4($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_swizzle($b, 2, 1, 3, 0); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <4 x i32> @swizzlehi_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _shuffleXY_float32x4to3($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_shuffle($a, $b, 7, 0, 0); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <3 x float> @shuffleXY_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _shuffle_int32x4($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_shuffle($a, $b, 7, 0, 5, 3); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <4 x i32> @shuffle_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _shuffleXY_int32x4($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_shuffle($a, $b, 7, 0, 0, 0); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <4 x i32> @shuffleXY_int32x4(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _splat_int32x3($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <3 x i32> @splat_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _swizzle_int32x3($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_swizzle($a, 0, 2, 1, 0); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <3 x i32> @swizzle_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _swizzlehi_int32x3($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_swizzle($b, 0, 2, 1, 0); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <3 x i32> @swizzlehi_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _shuffle_int32x3($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_shuffle($a, $b, 6, 0, 5); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <3 x i32> @shuffle_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _shuffleXY_int32x3($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_shuffle($a, $b, 6, 0, 0); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <3 x i32> @shuffleXY_int32x3(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _splat_int32x3to4($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <4 x i32> @splat_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _swizzle_int32x3to4($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_swizzle($a, 0, 2, 1, 2); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <4 x i32> @swizzle_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _swizzlehi_int32x3to4($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_swizzle($b, 2, 1, 0, 2); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <4 x i32> @swizzlehi_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _shuffle_int32x3to4($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_shuffle($a, $b, 6, 0, 5, 2); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <4 x i32> @shuffle_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _shuffleXY_int32x3to4($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_shuffle($a, $b, 6, 0, 0, 0); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <4 x i32> @shuffleXY_int32x3to4(<3 x i32> %a, <3 x i32> %b) nounwind { +entry: + %sel = shufflevector <3 x i32> %a, <3 x i32> %b, <4 x i32> + ret <4 x i32> %sel +} + +; CHECK: function _splat_int32x4to3($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <3 x i32> @splat_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _swizzle_int32x4to3($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_swizzle($a, 0, 3, 1, 0); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <3 x i32> @swizzle_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _swizzlehi_int32x4to3($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_swizzle($b, 2, 1, 3, 0); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <3 x i32> @swizzlehi_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _shuffle_int32x4to3($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_shuffle($a, $b, 7, 0, 5); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <3 x i32> @shuffle_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _shuffleXY_int32x4to3($a,$b) { +; CHECK: $a = SIMD_int32x4($a); +; CHECK: $b = SIMD_int32x4($b); +; CHECK: var $sel = SIMD_int32x4(0,0,0,0) +; CHECK: $sel = SIMD_int32x4_shuffle($a, $b, 7, 0, 0); +; CHECK: return (SIMD_int32x4($sel)); +; CHECK: } +define <3 x i32> @shuffleXY_int32x4to3(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %sel = shufflevector <4 x i32> %a, <4 x i32> %b, <3 x i32> + ret <3 x i32> %sel +} + +; CHECK: function _splat_float32x4($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <4 x float> @splat_float32x4(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _swizzle_float32x4($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_swizzle($a, 0, 3, 1, 2); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <4 x float> @swizzle_float32x4(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _swizzlehi_float32x4($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_swizzle($b, 2, 1, 3, 0); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <4 x float> @swizzlehi_float32x4(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _shuffle_float32x4($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_shuffle($a, $b, 7, 0, 5, 3); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <4 x float> @shuffle_float32x4(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _shuffleXY_float32x4($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_shuffle($a, $b, 7, 0, 0, 0); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <4 x float> @shuffleXY_float32x4(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _splat_float32x3($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <3 x float> @splat_float32x3(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _swizzle_float32x3($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_swizzle($a, 0, 2, 1, 0); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <3 x float> @swizzle_float32x3(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _swizzlehi_float32x3($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_swizzle($b, 0, 2, 1, 0); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <3 x float> @swizzlehi_float32x3(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _shuffle_float32x3($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_shuffle($a, $b, 6, 0, 5); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <3 x float> @shuffle_float32x3(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _shuffleXY_float32x3($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_shuffle($a, $b, 6, 0, 0); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <3 x float> @shuffleXY_float32x3(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _splat_float32x3to4($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <4 x float> @splat_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _swizzle_float32x3to4($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_swizzle($a, 0, 2, 1, 2); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <4 x float> @swizzle_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _swizzlehi_float32x3to4($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_swizzle($b, 2, 1, 0, 2); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <4 x float> @swizzlehi_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _shuffle_float32x3to4($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_shuffle($a, $b, 6, 0, 5, 2); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <4 x float> @shuffle_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _shuffleXY_float32x3to4($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_shuffle($a, $b, 6, 0, 0, 0); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <4 x float> @shuffleXY_float32x3to4(<3 x float> %a, <3 x float> %b) nounwind { +entry: + %sel = shufflevector <3 x float> %a, <3 x float> %b, <4 x i32> + ret <4 x float> %sel +} + +; CHECK: function _splat_float32x4to3($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_swizzle($a, 0, 0, 0, 0); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <3 x float> @splat_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _swizzle_float32x4to3($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_swizzle($a, 0, 3, 1, 0); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <3 x float> @swizzle_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _swizzlehi_float32x4to3($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_swizzle($b, 2, 1, 3, 0); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <3 x float> @swizzlehi_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32> + ret <3 x float> %sel +} + +; CHECK: function _shuffle_float32x4to3($a,$b) { +; CHECK: $a = SIMD_float32x4($a); +; CHECK: $b = SIMD_float32x4($b); +; CHECK: var $sel = SIMD_float32x4(0,0,0,0) +; CHECK: $sel = SIMD_float32x4_shuffle($a, $b, 7, 0, 5); +; CHECK: return (SIMD_float32x4($sel)); +; CHECK: } +define <3 x float> @shuffle_float32x4to3(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %sel = shufflevector <4 x float> %a, <4 x float> %b, <3 x i32> + ret <3 x float> %sel +}