From 298f306ec4b67ec645d58e8a3fa49f8ee3449ddb Mon Sep 17 00:00:00 2001
From: Alon Zakai <alonzakai@gmail.com>
Date: Tue, 25 Nov 2014 11:10:50 -0800
Subject: [PATCH 1/9] handle non-eq icmps of more than 64 bits

---
 lib/Target/JSBackend/ExpandI64.cpp | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/lib/Target/JSBackend/ExpandI64.cpp b/lib/Target/JSBackend/ExpandI64.cpp
index c07324e79d1..168e66063b3 100644
--- a/lib/Target/JSBackend/ExpandI64.cpp
+++ b/lib/Target/JSBackend/ExpandI64.cpp
@@ -655,8 +655,10 @@ bool ExpandI64::splitInst(Instruction *I) {
               return true;
             }
           }
-          assert(I->getOperand(0)->getType() == i64);
-          Instruction *A, *B, *C, *D, *Final;
+          Type *T = I->getOperand(0)->getType();
+          assert(T->isIntegerTy() && T->getIntegerBitWidth() % 32 == 0);
+          int NumChunks = getNumChunks(T);
+          assert(NumChunks >= 2);
           ICmpInst::Predicate StrictPred = Pred;
           ICmpInst::Predicate UnsignedPred = Pred;
           switch (Pred) {
@@ -670,11 +672,17 @@ bool ExpandI64::splitInst(Instruction *I) {
             case ICmpInst::ICMP_UGT: break;
             default: assert(0);
           }
-          A = CopyDebug(new ICmpInst(I, StrictPred, LeftChunks[1], RightChunks[1]), I);
-          B = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_EQ, LeftChunks[1], RightChunks[1]), I);
-          C = CopyDebug(new ICmpInst(I, UnsignedPred, LeftChunks[0], RightChunks[0]), I);
-          D = CopyDebug(BinaryOperator::Create(Instruction::And, B, C, "", I), I);
-          Final = CopyDebug(BinaryOperator::Create(Instruction::Or, A, D, "", I), I);
+          // general pattern is
+          // a,b,c < A,B,C    =>    c < C || (c == C && b < B) || (c == C && b == B && a < A)
+          Instruction *Final = CopyDebug(new ICmpInst(I, StrictPred, LeftChunks[NumChunks-1], RightChunks[NumChunks-1]), I);
+          for (int i = NumChunks-2; i >= 0; i--) {
+            Instruction *Curr = CopyDebug(new ICmpInst(I, UnsignedPred, LeftChunks[i], RightChunks[i]), I);
+            for (int j = NumChunks-1; j > i; j--) {
+              Instruction *Temp = CopyDebug(new ICmpInst(I, ICmpInst::ICMP_EQ, LeftChunks[j], RightChunks[j]), I);
+              Curr = CopyDebug(BinaryOperator::Create(Instruction::And, Temp, Curr, "", I), I);
+            }
+            Final = CopyDebug(BinaryOperator::Create(Instruction::Or, Final, Curr, "", I), I);
+          }
           I->replaceAllUsesWith(Final);
           break;
         }

From 0805948927ccfd5a678b0ebbfd9e3c21fbcb0675 Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Sat, 29 Nov 2014 06:33:05 -0800
Subject: [PATCH 2/9] Use HEAPU8 for SIMD load and store

Change from HEAP8 to HEAPU8, as that is what Odin's validator is
currently requiring.
---
 lib/Target/JSBackend/JSBackend.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index c18bc7468a7..1dfbfc1a8ba 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -1550,9 +1550,9 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
         std::string PS = getValueAsStr(P);
         Code << getAssignIfNeeded(I);
         if (VT->getElementType()->isIntegerTy()) {
-          Code << "SIMD_int32x4_load(HEAP8, " << PS << ")";
+          Code << "SIMD_int32x4_load(HEAPU8, " << PS << ")";
         } else {
-          Code << "SIMD_float32x4_load(HEAP8, " << PS << ")";
+          Code << "SIMD_float32x4_load(HEAPU8, " << PS << ")";
         }
         break;
       }
@@ -1584,9 +1584,9 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
       std::string VS = getValueAsStr(SI->getValueOperand());
       Code << getAdHocAssign(PS, P->getType()) << getValueAsStr(P) << ';';
       if (VT->getElementType()->isIntegerTy()) {
-        Code << "SIMD_int32x4_store(HEAP8, " << PS << ", " << VS << ")";
+        Code << "SIMD_int32x4_store(HEAPU8, " << PS << ", " << VS << ")";
       } else {
-        Code << "SIMD_float32x4_store(HEAP8, " << PS << ", " << VS << ")";
+        Code << "SIMD_float32x4_store(HEAPU8, " << PS << ", " << VS << ")";
       }
       return true;
     } else if (Operator::getOpcode(I) == Instruction::ExtractElement) {

From a0df0e0276ad920b44746e87e44de5aa0e9895ee Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Sat, 29 Nov 2014 06:54:03 -0800
Subject: [PATCH 3/9] Enable asm.js validation for SIMD

Not everything is working yet, but we're now at the point where it's
more useful to enable validation than to disable it by default.
---
 lib/Target/JSBackend/JSBackend.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index 1dfbfc1a8ba..071ad05144f 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -388,7 +388,6 @@ namespace {
              VT->getElementType()->getPrimitiveSizeInBits() == 1);
       assert(VT->getNumElements() == 4);
       UsesSIMD = true;
-      CantValidate = "SIMD types in use";
     }
 
     std::string ensureCast(std::string S, Type *T, AsmCast sign) {

From 28ad0339cbb0cef1d3975566b793d93521d2aad9 Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Sat, 29 Nov 2014 07:34:26 -0800
Subject: [PATCH 4/9] Start a JS backend cost model

Implement the TargetTransformInfo interface with JSTargetTransformInfo,
which gives costs used by the optimizer. The implementation is currently
fairly minimal.
---
 lib/Target/JSBackend/CMakeLists.txt           |   1 +
 lib/Target/JSBackend/JS.h                     |  29 +++++
 lib/Target/JSBackend/JSTargetMachine.cpp      |  20 ++++
 lib/Target/JSBackend/JSTargetMachine.h        |   4 +
 .../JSBackend/JSTargetTransformInfo.cpp       | 103 ++++++++++++++++++
 5 files changed, 157 insertions(+)
 create mode 100644 lib/Target/JSBackend/JS.h
 create mode 100644 lib/Target/JSBackend/JSTargetTransformInfo.cpp

diff --git a/lib/Target/JSBackend/CMakeLists.txt b/lib/Target/JSBackend/CMakeLists.txt
index 24622e1c3f2..ac04b0a2a71 100644
--- a/lib/Target/JSBackend/CMakeLists.txt
+++ b/lib/Target/JSBackend/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_target(JSBackendCodeGen
   ExpandI64.cpp
   JSBackend.cpp
   JSTargetMachine.cpp
+  JSTargetTransformInfo.cpp
   Relooper.cpp
   SimplifyAllocas.cpp
   )
diff --git a/lib/Target/JSBackend/JS.h b/lib/Target/JSBackend/JS.h
new file mode 100644
index 00000000000..6fe22426b8e
--- /dev/null
+++ b/lib/Target/JSBackend/JS.h
@@ -0,0 +1,29 @@
+//===-- JS.h - Top-level interface for JS representation ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the JS
+// target library, as used by the LLVM JIT.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_JS_H
+#define TARGET_JS_H
+
+namespace llvm {
+
+class ImmutablePass;
+class JSTargetMachine;
+
+/// createJSISelDag - This pass converts a legalized DAG into a
+/// \brief Creates an JS-specific Target Transformation Info pass.
+ImmutablePass *createJSTargetTransformInfoPass(const JSTargetMachine *TM);
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/JSBackend/JSTargetMachine.cpp b/lib/Target/JSBackend/JSTargetMachine.cpp
index af428f2eb2d..92a0e39a40f 100644
--- a/lib/Target/JSBackend/JSTargetMachine.cpp
+++ b/lib/Target/JSBackend/JSTargetMachine.cpp
@@ -1,4 +1,18 @@
+//===-- JSTargetMachine.cpp - Define TargetMachine for the JS -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the JS specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
 #include "JSTargetMachine.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/PassManager.h"
 using namespace llvm;
@@ -12,3 +26,9 @@ JSTargetMachine::JSTargetMachine(const Target &T, StringRef Triple,
        "f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128") {
   CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
 }
+
+void JSTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+  // We don't currently use BasicTTI because that depends on
+  // TargetLoweringInfo, which we don't currently implement.
+  PM.add(createJSTargetTransformInfoPass(this));
+}
diff --git a/lib/Target/JSBackend/JSTargetMachine.h b/lib/Target/JSBackend/JSTargetMachine.h
index 3912d3b5b29..df5ac445dbf 100644
--- a/lib/Target/JSBackend/JSTargetMachine.h
+++ b/lib/Target/JSBackend/JSTargetMachine.h
@@ -15,6 +15,7 @@
 #ifndef JSTARGETMACHINE_H
 #define JSTARGETMACHINE_H
 
+#include "JS.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
 
@@ -39,6 +40,9 @@ class JSTargetMachine : public TargetMachine {
                                    AnalysisID StopAfter);
 
   virtual const DataLayout *getDataLayout() const { return &DL; }
+
+  /// \brief Register X86 analysis passes with a pass manager.
+  virtual void addAnalysisPasses(PassManagerBase &PM);
 };
 
 } // End llvm namespace
diff --git a/lib/Target/JSBackend/JSTargetTransformInfo.cpp b/lib/Target/JSBackend/JSTargetTransformInfo.cpp
new file mode 100644
index 00000000000..74c2201d54e
--- /dev/null
+++ b/lib/Target/JSBackend/JSTargetTransformInfo.cpp
@@ -0,0 +1,103 @@
+//===-- JSTargetTransformInfo.cpp - JS specific TTI pass ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// JS target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jstti"
+#include "JS.h"
+#include "JSTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeJSTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class JSTTI : public ImmutablePass, public TargetTransformInfo {
+public:
+  JSTTI() : ImmutablePass(ID) {
+    llvm_unreachable("This pass cannot be directly constructed");
+  }
+
+  JSTTI(const JSTargetMachine *TM)
+      : ImmutablePass(ID) {
+    initializeJSTTIPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual void initializePass() {
+    pushTTIStack(this);
+  }
+
+  virtual void finalizePass() {
+    popTTIStack();
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    TargetTransformInfo::getAnalysisUsage(AU);
+  }
+
+  /// Pass identification.
+  static char ID;
+
+  /// Provide necessary pointer adjustments for the two base classes.
+  virtual void *getAdjustedAnalysisPointer(const void *ID) {
+    if (ID == &TargetTransformInfo::ID)
+      return (TargetTransformInfo*)this;
+    return this;
+  }
+
+  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
+
+  virtual unsigned getRegisterBitWidth(bool Vector) const;
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(JSTTI, TargetTransformInfo, "jstti",
+                   "JS Target Transform Info", true, true, false)
+char JSTTI::ID = 0;
+
+ImmutablePass *
+llvm::createJSTargetTransformInfoPass(const JSTargetMachine *TM) {
+  return new JSTTI(TM);
+}
+
+
+//===----------------------------------------------------------------------===//
+//
+// JS cost model.
+//
+//===----------------------------------------------------------------------===//
+
+JSTTI::PopcntSupportKind JSTTI::getPopcntSupport(unsigned TyWidth) const {
+  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+  // Hopefully we'll get popcnt in ES7, but for now, we just have software.
+  return PSK_Software;
+}
+
+unsigned JSTTI::getRegisterBitWidth(bool Vector) const {
+  if (Vector) {
+    return 128;
+  }
+
+  return 32;
+}

From 0a328798bd208d573647d96d106cf79a09ef8d97 Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Sat, 29 Nov 2014 07:39:08 -0800
Subject: [PATCH 5/9] Optimize splat constants.

---
 lib/Target/JSBackend/JSBackend.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index 071ad05144f..8f924007a28 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -1108,6 +1108,15 @@ std::string JSWriter::getConstant(const Constant* CV, AsmCast sign) {
 }
 
 std::string JSWriter::getConstantVector(Type *ElementType, std::string x, std::string y, std::string z, std::string w) {
+  // Check for a splat.
+  if (x == y && x == z && x == w) {
+    if (ElementType->isIntegerTy()) {
+      return "SIMD_int32x4_splat(" + x + ')';
+    } else {
+      return "SIMD_float32x4_splat(Math_fround(" + x + "))";
+    }
+  }
+
   if (ElementType->isIntegerTy()) {
     return "SIMD_int32x4(" + x + ',' + y + ',' + z + ',' + w + ')';
   } else {

From c403bd804072fb2cd0d6a7fdcad52fb4116e7524 Mon Sep 17 00:00:00 2001
From: Alon Zakai <alonzakai@gmail.com>
Date: Fri, 5 Dec 2014 10:55:44 -0800
Subject: [PATCH 6/9] temporary workaround for pathological 40x slower linking
 issue

---
 lib/IR/Verifier.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index da6b573a0c3..31b5f3219c3 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -503,10 +503,10 @@ void Verifier::visitGlobalVariable(GlobalVariable &GV) {
     }
   }
 
-  if (!GV.hasInitializer()) {
+  //if (!GV.hasInitializer()) { // XXX EMSCRIPTEN - do not do extra verification below, 40x slower linking on some big projects
     visitGlobalValue(GV);
     return;
-  }
+  //}
 
   // Walk any aggregate initializers looking for bitcasts between address spaces
   SmallPtrSet<const Value *, 4> Visited;

From 6ff7cef360c874fc28efdf687593f0eb5d211284 Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Mon, 8 Dec 2014 15:10:58 -0800
Subject: [PATCH 7/9] Add a call handler for emscripten_float32x4_abs

---
 lib/Target/JSBackend/CallHandlers.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/Target/JSBackend/CallHandlers.h b/lib/Target/JSBackend/CallHandlers.h
index 4af4de7e9af..0f2cbabf20d 100644
--- a/lib/Target/JSBackend/CallHandlers.h
+++ b/lib/Target/JSBackend/CallHandlers.h
@@ -525,6 +525,7 @@ DEF_BUILTIN_HANDLER(emscripten_float32x4_greaterThanOrEqual, SIMD_float32x4_grea
 DEF_BUILTIN_HANDLER(emscripten_float32x4_select, SIMD_float32x4_select);
 DEF_BUILTIN_HANDLER(emscripten_float32x4_min, SIMD_float32x4_min);
 DEF_BUILTIN_HANDLER(emscripten_float32x4_max, SIMD.float32x4_max);
+DEF_BUILTIN_HANDLER(emscripten_float32x4_abs, SIMD_float32x4_abs);
 DEF_BUILTIN_HANDLER(emscripten_float32x4_sqrt, SIMD_float32x4_sqrt);
 DEF_BUILTIN_HANDLER(emscripten_float32x4_and, SIMD_float32x4_and);
 DEF_BUILTIN_HANDLER(emscripten_float32x4_or, SIMD_float32x4_or);
@@ -600,6 +601,7 @@ void setupCallHandlers() {
   SETUP_CALL_HANDLER(emscripten_float32x4_signmask);
   SETUP_CALL_HANDLER(emscripten_float32x4_min);
   SETUP_CALL_HANDLER(emscripten_float32x4_max);
+  SETUP_CALL_HANDLER(emscripten_float32x4_abs);
   SETUP_CALL_HANDLER(emscripten_float32x4_sqrt);
   SETUP_CALL_HANDLER(emscripten_float32x4_equal);
   SETUP_CALL_HANDLER(emscripten_float32x4_notEqual);

From 336936ed5436eb53fa23178bb9814d36a04b7ad4 Mon Sep 17 00:00:00 2001
From: Alon Zakai <alonzakai@gmail.com>
Date: Tue, 9 Dec 2014 10:56:07 -0800
Subject: [PATCH 8/9] update test/CodeGen/JS/expand-i64.ll

---
 test/CodeGen/JS/expand-i64.ll | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/CodeGen/JS/expand-i64.ll b/test/CodeGen/JS/expand-i64.ll
index d667548abf4..81d62707d83 100644
--- a/test/CodeGen/JS/expand-i64.ll
+++ b/test/CodeGen/JS/expand-i64.ll
@@ -174,9 +174,9 @@ define i32 @icmp_ne(i64 %a, i64 %b) {
 
 ; CHECK: define i32 @icmp_slt(i32, i32, i32, i32) {
 ; CHECK:   %5 = icmp slt i32 %1, %3
-; CHECK:   %6 = icmp eq i32 %1, %3
-; CHECK:   %7 = icmp ult i32 %0, %2
-; CHECK:   %8 = and i1 %6, %7
+; CHECK:   %6 = icmp ult i32 %0, %2
+; CHECK:   %7 = icmp eq i32 %1, %3
+; CHECK:   %8 = and i1 %7, %6
 ; CHECK:   %9 = or i1 %5, %8
 ; CHECK:   %d = zext i1 %9 to i32
 ; CHECK:   ret i32 %d
@@ -189,9 +189,9 @@ define i32 @icmp_slt(i64 %a, i64 %b) {
 
 ; CHECK: define i32 @icmp_ult(i32, i32, i32, i32) {
 ; CHECK:   %5 = icmp ult i32 %1, %3
-; CHECK:   %6 = icmp eq i32 %1, %3
-; CHECK:   %7 = icmp ult i32 %0, %2
-; CHECK:   %8 = and i1 %6, %7
+; CHECK:   %6 = icmp ult i32 %0, %2
+; CHECK:   %7 = icmp eq i32 %1, %3
+; CHECK:   %8 = and i1 %7, %6
 ; CHECK:   %9 = or i1 %5, %8
 ; CHECK:   %d = zext i1 %9 to i32
 ; CHECK:   ret i32 %d

From 30f7e93bffaccba51732ebb59036c9b0b91d39cf Mon Sep 17 00:00:00 2001
From: Alon Zakai <alonzakai@gmail.com>
Date: Wed, 10 Dec 2014 11:15:03 -0800
Subject: [PATCH 9/9] 1.27.2

---
 emscripten-version.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/emscripten-version.txt b/emscripten-version.txt
index b1832cf8e43..362c239d66d 100644
--- a/emscripten-version.txt
+++ b/emscripten-version.txt
@@ -1,2 +1,2 @@
-1.27.1
+1.27.2