Skip to content
This repository was archived by the owner on Nov 1, 2021. It is now read-only.

Commit 4f1e8b8

Browse files
committed
Support for non-power-of-2 SIMD types
LLVM's optimizer generates types like <3 x float> in some cases, so we should support them. Also, this is a chance to make use of loadXYZ/storeXYZ and friends.
1 parent 9e06c5a commit 4f1e8b8

File tree

2 files changed

+121
-30
lines changed

2 files changed

+121
-30
lines changed

lib/Target/JSBackend/JSBackend.cpp

+53-30
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,8 @@ namespace {
386386
// of the compare that produced them.
387387
assert(VT->getElementType()->getPrimitiveSizeInBits() == 32 ||
388388
VT->getElementType()->getPrimitiveSizeInBits() == 1);
389-
assert(VT->getNumElements() == 4);
389+
assert(VT->getBitWidth() <= 128);
390+
assert(VT->getNumElements() <= 4);
390391
UsesSIMD = true;
391392
}
392393

@@ -1074,18 +1075,23 @@ std::string JSWriter::getConstant(const Constant* CV, AsmCast sign) {
10741075
return "0";
10751076
}
10761077
} else if (const ConstantDataVector *DV = dyn_cast<ConstantDataVector>(CV)) {
1077-
return getConstantVector(cast<VectorType>(CV->getType())->getElementType(),
1078-
getConstant(DV->getElementAsConstant(0)),
1079-
getConstant(DV->getElementAsConstant(1)),
1080-
getConstant(DV->getElementAsConstant(2)),
1081-
getConstant(DV->getElementAsConstant(3)));
1078+
unsigned NumElts = cast<VectorType>(DV->getType())->getNumElements();
1079+
Type *EltTy = cast<VectorType>(DV->getType())->getElementType();
1080+
Constant *Undef = UndefValue::get(EltTy);
1081+
return getConstantVector(EltTy,
1082+
getConstant(NumElts > 0 ? DV->getElementAsConstant(0) : Undef),
1083+
getConstant(NumElts > 1 ? DV->getElementAsConstant(1) : Undef),
1084+
getConstant(NumElts > 2 ? DV->getElementAsConstant(2) : Undef),
1085+
getConstant(NumElts > 3 ? DV->getElementAsConstant(3) : Undef));
10821086
} else if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) {
1083-
assert(V->getNumOperands() == 4);
1087+
unsigned NumElts = cast<VectorType>(CV->getType())->getNumElements();
1088+
Type *EltTy = cast<VectorType>(CV->getType())->getElementType();
1089+
Constant *Undef = UndefValue::get(EltTy);
10841090
return getConstantVector(cast<VectorType>(V->getType())->getElementType(),
1085-
getConstant(V->getOperand(0)),
1086-
getConstant(V->getOperand(1)),
1087-
getConstant(V->getOperand(2)),
1088-
getConstant(V->getOperand(3)));
1091+
getConstant(NumElts > 0 ? V->getOperand(0) : Undef),
1092+
getConstant(NumElts > 1 ? V->getOperand(1) : Undef),
1093+
getConstant(NumElts > 2 ? V->getOperand(2) : Undef),
1094+
getConstant(NumElts > 3 ? V->getOperand(3) : Undef));
10891095
} else if (const ConstantArray *CA = dyn_cast<const ConstantArray>(CV)) {
10901096
// handle things like [i8* bitcast (<{ i32, i32, i32 }>* @_ZTISt9bad_alloc to i8*)] which clang can emit for landingpads
10911097
assert(CA->getNumOperands() == 1);
@@ -1300,20 +1306,22 @@ void JSWriter::generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw
13001306
// Check whether can generate SIMD.js swizzle or shuffle.
13011307
std::string A = getValueAsStr(SVI->getOperand(0));
13021308
std::string B = getValueAsStr(SVI->getOperand(1));
1303-
int Mask0 = SVI->getMaskValue(0);
1304-
int Mask1 = SVI->getMaskValue(1);
1305-
int Mask2 = SVI->getMaskValue(2);
1306-
int Mask3 = SVI->getMaskValue(3);
1309+
int OpNumElements = cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
1310+
int ResultNumElements = SVI->getType()->getNumElements();
1311+
int Mask0 = ResultNumElements > 0 ? SVI->getMaskValue(0) : -1;
1312+
int Mask1 = ResultNumElements > 1 ? SVI->getMaskValue(1) : -1;
1313+
int Mask2 = ResultNumElements > 2 ? SVI->getMaskValue(2) : -1;
1314+
int Mask3 = ResultNumElements > 3 ? SVI->getMaskValue(3) : -1;
13071315
bool swizzleA = false;
13081316
bool swizzleB = false;
1309-
if ((Mask0 < 4) && (Mask1 < 4) &&
1310-
(Mask2 < 4) && (Mask3 < 4)) {
1317+
if ((Mask0 < OpNumElements) && (Mask1 < OpNumElements) &&
1318+
(Mask2 < OpNumElements) && (Mask3 < OpNumElements)) {
13111319
swizzleA = true;
13121320
}
1313-
if ((Mask0 < 0 || (Mask0 >= 4 && Mask0 < 8)) &&
1314-
(Mask1 < 0 || (Mask1 >= 4 && Mask1 < 8)) &&
1315-
(Mask2 < 0 || (Mask2 >= 4 && Mask2 < 8)) &&
1316-
(Mask3 < 0 || (Mask3 >= 4 && Mask3 < 8))) {
1321+
if ((Mask0 < 0 || (Mask0 >= OpNumElements && Mask0 < OpNumElements * 2)) &&
1322+
(Mask1 < 0 || (Mask1 >= OpNumElements && Mask1 < OpNumElements * 2)) &&
1323+
(Mask2 < 0 || (Mask2 >= OpNumElements && Mask2 < OpNumElements * 2)) &&
1324+
(Mask3 < 0 || (Mask3 >= OpNumElements && Mask3 < OpNumElements * 2))) {
13171325
swizzleB = true;
13181326
}
13191327
assert(!(swizzleA && swizzleB));
@@ -1324,18 +1332,22 @@ void JSWriter::generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw
13241332
} else {
13251333
Code << "SIMD_float32x4_swizzle(" << T;
13261334
}
1327-
for (unsigned int i = 0; i < 4; i++) {
1335+
int i = 0;
1336+
for (; i < ResultNumElements; ++i) {
13281337
Code << ", ";
13291338
int Mask = SVI->getMaskValue(i);
13301339
if (Mask < 0) {
13311340
Code << 0;
1332-
} else if (Mask < 4) {
1341+
} else if (Mask < OpNumElements) {
13331342
Code << Mask;
13341343
} else {
1335-
assert(Mask < 8);
1336-
Code << (Mask-4);
1344+
assert(Mask < OpNumElements * 2);
1345+
Code << (Mask-OpNumElements);
13371346
}
13381347
}
1348+
for (; i < 4; ++i) {
1349+
Code << ", 0";
1350+
}
13391351
Code << ")";
13401352
return;
13411353
}
@@ -1354,7 +1366,10 @@ void JSWriter::generateShuffleVectorExpression(const ShuffleVectorInst *SVI, raw
13541366
for (unsigned int i = 0; i < Indices.size(); ++i) {
13551367
if (i != 0)
13561368
Code << ", ";
1357-
Code << Indices[i];
1369+
int Mask = Indices[i];
1370+
if (Mask >= OpNumElements)
1371+
Mask = Mask - OpNumElements + 4;
1372+
Code << Mask;
13581373
}
13591374

13601375
Code << ")";
@@ -1626,11 +1641,15 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
16261641
const LoadInst *LI = cast<LoadInst>(I);
16271642
const Value *P = LI->getPointerOperand();
16281643
std::string PS = getValueAsStr(P);
1644+
1645+
// Determine if this is a partial store.
1646+
std::string Part = (std::string[]) { "X", "XY", "XYZ", "" }[VT->getNumElements() - 1];
1647+
16291648
Code << getAssignIfNeeded(I);
16301649
if (VT->getElementType()->isIntegerTy()) {
1631-
Code << "SIMD_int32x4_load(HEAPU8, " << PS << ")";
1650+
Code << "SIMD_int32x4_load" << Part << "(HEAPU8, " << PS << ")";
16321651
} else {
1633-
Code << "SIMD_float32x4_load(HEAPU8, " << PS << ")";
1652+
Code << "SIMD_float32x4_load" << Part << "(HEAPU8, " << PS << ")";
16341653
}
16351654
break;
16361655
}
@@ -1666,10 +1685,14 @@ bool JSWriter::generateSIMDExpression(const User *I, raw_string_ostream& Code) {
16661685
std::string PS = getOpName(P);
16671686
std::string VS = getValueAsStr(SI->getValueOperand());
16681687
Code << getAdHocAssign(PS, P->getType()) << getValueAsStr(P) << ';';
1688+
1689+
// Determine if this is a partial store.
1690+
std::string Part = (std::string[]) { "X", "XY", "XYZ", "" }[VT->getNumElements() - 1];
1691+
16691692
if (VT->getElementType()->isIntegerTy()) {
1670-
Code << "SIMD_int32x4_store(HEAPU8, " << PS << ", " << VS << ")";
1693+
Code << "SIMD_int32x4_store" << Part << "(HEAPU8, " << PS << ", " << VS << ")";
16711694
} else {
1672-
Code << "SIMD_float32x4_store(HEAPU8, " << PS << ", " << VS << ")";
1695+
Code << "SIMD_float32x4_store" << Part << "(HEAPU8, " << PS << ", " << VS << ")";
16731696
}
16741697
return true;
16751698
} else if (Operator::getOpcode(I) == Instruction::ExtractElement) {

test/CodeGen/JS/simd-loadstore.ll

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; RUN: llc < %s | FileCheck %s
2+
3+
target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:128-n32-S128"
4+
target triple = "asmjs-unknown-emscripten"
5+
6+
; CHECK: function _fx1($p) {
7+
; CHECK: $p = $p|0;
8+
; CHECK: var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0)
9+
; CHECK: $t = SIMD_float32x4_loadX(HEAPU8, $p);
10+
; CHECK: $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+0.5),Math_fround(+0),Math_fround(+0),Math_fround(+0)));
11+
; CHECK: $q = $p;SIMD_float32x4_storeX(HEAPU8, $q, $s);
12+
; CHECK: return;
13+
; CHECK: }
14+
define void @fx1(i8* %p) {
15+
%q = bitcast i8* %p to <1 x float>*
16+
%t = load <1 x float>* %q
17+
%s = fadd <1 x float> %t, <float 0.5>
18+
store <1 x float> %s, <1 x float>* %q
19+
ret void
20+
}
21+
22+
; CHECK: function _fx2($p) {
23+
; CHECK: $p = $p|0;
24+
; CHECK: var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0)
25+
; CHECK: $t = SIMD_float32x4_loadXY(HEAPU8, $p);
26+
; CHECK: $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+3.5),Math_fround(+7.5),Math_fround(+0),Math_fround(+0)));
27+
; CHECK: $q = $p;SIMD_float32x4_storeXY(HEAPU8, $q, $s);
28+
; CHECK: return;
29+
; CHECK: }
30+
define void @fx2(i8* %p) {
31+
%q = bitcast i8* %p to <2 x float>*
32+
%t = load <2 x float>* %q
33+
%s = fadd <2 x float> %t, <float 3.5, float 7.5>
34+
store <2 x float> %s, <2 x float>* %q
35+
ret void
36+
}
37+
38+
; CHECK: function _fx3($p) {
39+
; CHECK: $p = $p|0;
40+
; CHECK: var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0)
41+
; CHECK: $t = SIMD_float32x4_loadXYZ(HEAPU8, $p);
42+
; CHECK: $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+1.5),Math_fround(+4.5),Math_fround(+6.5),Math_fround(+0)));
43+
; CHECK: $q = $p;SIMD_float32x4_storeXYZ(HEAPU8, $q, $s);
44+
; CHECK: return;
45+
; CHECK: }
46+
define void @fx3(i8* %p) {
47+
%q = bitcast i8* %p to <3 x float>*
48+
%t = load <3 x float>* %q
49+
%s = fadd <3 x float> %t, <float 1.5, float 4.5, float 6.5>
50+
store <3 x float> %s, <3 x float>* %q
51+
ret void
52+
}
53+
54+
; CHECK: function _fx4($p) {
55+
; CHECK: $p = $p|0;
56+
; CHECK: var $q = 0, $s = SIMD_float32x4(0,0,0,0), $t = SIMD_float32x4(0,0,0,0)
57+
; CHECK: $t = SIMD_float32x4_load(HEAPU8, $p);
58+
; CHECK: $s = SIMD_float32x4_add($t,SIMD_float32x4(Math_fround(+9.5),Math_fround(+5.5),Math_fround(+1.5),Math_fround(+-3.5)));
59+
; CHECK: $q = $p;SIMD_float32x4_store(HEAPU8, $q, $s);
60+
; CHECK: return;
61+
; CHECK: }
62+
define void @fx4(i8* %p) {
63+
%q = bitcast i8* %p to <4 x float>*
64+
%t = load <4 x float>* %q
65+
%s = fadd <4 x float> %t, <float 9.5, float 5.5, float 1.5, float -3.5>
66+
store <4 x float> %s, <4 x float>* %q
67+
ret void
68+
}

0 commit comments

Comments
 (0)