Skip to content

Commit 5613c65

Browse files
committed
Z: Accelerate ArraysSupport.vectorizedHashCode
Adds ArraysSupport.vectorizedHashCode as a recognized method and accelerate with a SIMD instruction sequence. The hash function is the same as String.hashCode, so the instruction generation logic is commoned between the two into a helper function. Signed-off-by: Spencer Comin <spencer.comin@ibm.com>
1 parent 0e34d4c commit 5613c65

7 files changed

+178
-61
lines changed

runtime/compiler/codegen/J9CodeGenerator.hpp

+11
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,16 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz);
502502
*/
503503
void setSupportsInlineVectorizedMismatch() { _j9Flags.set(SupportsInlineVectorizedMismatch); }
504504

505+
/** \brief
506+
* Determines whether the code generator supports inlining of jdk/internal/util/ArraysSupport.vectorizedHashCode
507+
*/
508+
bool getSupportsInlineVectorizedHashCode() { return _j9Flags.testAny(SupportsInlineVectorizedHashCode); }
509+
510+
/** \brief
511+
* The code generator supports inlining of jdk/internal/util/ArraysSupport.vectorizedHashCode
512+
*/
513+
void setSupportsInlineVectorizedHashCode() { _j9Flags.set(SupportsInlineVectorizedHashCode); }
514+
505515
/**
506516
* \brief
507517
* The number of nodes between a monext and the next monent before
@@ -666,6 +676,7 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz);
666676
SupportsInlineEncodeASCII = 0x00000400,
667677
SavesNonVolatileGPRsForGC = 0x00000800,
668678
SupportsInlineVectorizedMismatch = 0x00001000,
679+
SupportsInlineVectorizedHashCode = 0x00002000,
669680
};
670681

671682
flags32_t _j9Flags;

runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,7 @@
461461
jdk_internal_misc_Unsafe_copyMemory0,
462462
jdk_internal_loader_NativeLibraries_load,
463463
jdk_internal_util_ArraysSupport_vectorizedMismatch,
464+
jdk_internal_util_ArraysSupport_vectorizedHashCode,
464465
jdk_internal_util_Preconditions_checkIndex,
465466

466467
FirstVectorMethod,

runtime/compiler/env/j9method.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -4017,6 +4017,7 @@ void TR_ResolvedJ9Method::construct()
40174017
static X ArraysSupportMethods [] =
40184018
{
40194019
{x(TR::jdk_internal_util_ArraysSupport_vectorizedMismatch, "vectorizedMismatch", "(Ljava/lang/Object;JLjava/lang/Object;JII)I")},
4020+
{x(TR::jdk_internal_util_ArraysSupport_vectorizedHashCode, "vectorizedHashCode", "(Ljava/lang/Object;IIII)I")},
40204021
{ TR::unknownMethod}
40214022
};
40224023

runtime/compiler/optimizer/InlinerTempForJ9.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -4870,6 +4870,14 @@ TR_J9InlinerPolicy::supressInliningRecognizedInitialCallee(TR_CallSite* callsite
48704870
return true;
48714871
}
48724872
break;
4873+
case TR::jdk_internal_util_ArraysSupport_vectorizedHashCode:
4874+
{
4875+
if (comp->cg()->getSupportsInlineVectorizedHashCode())
4876+
{
4877+
return true;
4878+
}
4879+
break;
4880+
}
48734881
case TR::java_lang_StringLatin1_inflate:
48744882
if (comp->cg()->getSupportsInlineStringLatin1Inflate())
48754883
{

runtime/compiler/z/codegen/J9CodeGenerator.cpp

+19
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ J9::Z::CodeGenerator::initialize()
9797
!TR::Compiler->om.canGenerateArraylets())
9898
{
9999
cg->setSupportsInlineStringHashCode();
100+
cg->setSupportsInlineVectorizedHashCode();
100101
}
101102

102103
if (cg->getSupportsVectorRegisters() && comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z14) &&
@@ -3699,6 +3700,14 @@ J9::Z::CodeGenerator::suppressInliningOfRecognizedMethod(TR::RecognizedMethod me
36993700
}
37003701
}
37013702

3703+
if (self()->getSupportsInlineVectorizedHashCode())
3704+
{
3705+
if (method == TR::jdk_internal_util_ArraysSupport_vectorizedHashCode)
3706+
{
3707+
return true;
3708+
}
3709+
}
3710+
37023711
if (method == TR::java_lang_Integer_highestOneBit ||
37033712
method == TR::java_lang_Integer_numberOfLeadingZeros ||
37043713
method == TR::java_lang_Integer_numberOfTrailingZeros ||
@@ -3930,6 +3939,16 @@ J9::Z::CodeGenerator::inlineDirectCall(
39303939
}
39313940
break;
39323941

3942+
case TR::jdk_internal_util_ArraysSupport_vectorizedHashCode:
3943+
{
3944+
if (cg->getSupportsInlineVectorizedHashCode() && node->getArgument(4)->getOpCode().isLoadConst())
3945+
{
3946+
resultReg = TR::TreeEvaluator::inlineVectorizedHashCode(node, cg);
3947+
return resultReg != NULL;
3948+
}
3949+
break;
3950+
}
3951+
39333952
case TR::java_lang_StringLatin1_inflate:
39343953
if (cg->getSupportsInlineStringLatin1Inflate())
39353954
{

runtime/compiler/z/codegen/J9TreeEvaluator.cpp

+125-49
Original file line numberDiff line numberDiff line change
@@ -2640,16 +2640,15 @@ J9::Z::TreeEvaluator::inlineUTF16BEEncodeSIMD(TR::Node *node, TR::CodeGenerator
26402640
return node->setRegister(translated);
26412641
}
26422642

2643-
TR::Register*
2644-
J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg, bool isCompressed)
2643+
static TR::Register*
2644+
hashCodeHelper(TR::Node* node, TR::CodeGenerator* cg, TR::DataType elementType, TR::Node* nodeHash, bool isSigned)
26452645
{
26462646
TR::Compilation* comp = cg->comp();
2647-
//stringSize = Number of bytes to load to process 4 characters in SIMD loop
2647+
//chunkSize = Number of bytes to load to process 4 characters in SIMD loop
26482648
//terminateVal = SIMD loop cotroller allowing characters in multiple of 4 to be processes by loop
2649-
//VLLEZ instruction will load word(compressed String) or double word (decompressed String), elementSize is used for that
2650-
const short stringSize = (isCompressed ? 4 : 8);
2651-
const short terminateVal = (isCompressed ? 3 : 6);
2652-
const short elementSize = (isCompressed ? 2 : 3);
2649+
const int32_t chunkSize = 4 * TR::DataType::getSize(elementType);
2650+
const int32_t terminateVal = 3 * TR::DataType::getSize(elementType);
2651+
const bool nonZeroInitial = nodeHash != NULL && !nodeHash->isConstZeroValue();
26532652

26542653
TR::Node* nodeValue = node->getChild(0);
26552654
TR::Node* nodeIndex = node->getChild(1);
@@ -2669,8 +2668,6 @@ J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg
26692668
TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);
26702669

26712670
// Create the necessary registers
2672-
TR::Register* registerHash = cg->allocateRegister();
2673-
26742671
TR::Register* registerValue = cg->evaluate(nodeValue);
26752672
TR::Register* registerIndex = cg->gprClobberEvaluate(nodeIndex);
26762673
TR::Register* registerCount = cg->gprClobberEvaluate(nodeCount);
@@ -2687,26 +2684,45 @@ J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg
26872684

26882685
TR::Register* registerEnd = cg->allocateRegister(TR_GPR);
26892686

2687+
TR::Register* registerHash = NULL;
2688+
if(nonZeroInitial)
2689+
{
2690+
registerHash = cg->gprClobberEvaluate(nodeHash);
2691+
}
2692+
else
2693+
{
2694+
registerHash = cg->allocateRegister();
2695+
generateRREInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, registerHash, registerHash);
2696+
}
2697+
26902698
TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 12, cg);
26912699

26922700
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
26932701
cFlowRegionStart->setStartInternalControlFlow();
26942702

2695-
if(!isCompressed)
2703+
int shiftBy = 2;
2704+
switch (elementType)
26962705
{
2697-
// registerIndex *= 2 and registerCount *= 2
2698-
generateRSInstruction(cg, TR::InstOpCode::getShiftLeftLogicalSingleOpCode(), node, registerIndex, registerIndex, 1);
2699-
generateRSInstruction(cg, TR::InstOpCode::getShiftLeftLogicalSingleOpCode(), node, registerCount, registerCount, 1);
2706+
case TR::Int8:
2707+
break;
2708+
case TR::Int16:
2709+
// registerIndex *= 2 and registerCount *= 2
2710+
shiftBy = 1;
2711+
// intentional fallthrough
2712+
case TR::Int32:
2713+
// registerIndex *= 4 and registerCount *= 4
2714+
generateRSInstruction(cg, TR::InstOpCode::getShiftLeftLogicalSingleOpCode(), node, registerIndex, registerIndex, shiftBy);
2715+
generateRSInstruction(cg, TR::InstOpCode::getShiftLeftLogicalSingleOpCode(), node, registerCount, registerCount, shiftBy);
2716+
break;
2717+
default:
2718+
TR_ASSERT_FATAL(false, "Unsupported vectorizedHashCode element type");
27002719
}
27012720

27022721
// registerEnd = registerIndex + registerCount
27032722
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerEnd, generateS390MemoryReference(registerIndex, registerCount, 0, cg));
27042723

2705-
// registerHash = 0
2706-
generateRREInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, registerHash, registerHash);
2707-
2708-
// Branch to labelSerial if registerCount < stringSize
2709-
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, registerCount, static_cast<int32_t>(stringSize), TR::InstOpCode::COND_MASK4, labelSerial, false, false);
2724+
// Branch to labelSerial if registerCount < chunkSize
2725+
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, registerCount, chunkSize, TR::InstOpCode::COND_BL, labelSerial, false, false);
27102726

27112727
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelVector);
27122728

@@ -2723,33 +2739,45 @@ J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg
27232739
// registerVA = snippetData1
27242740
generateVRXInstruction(cg, TR::InstOpCode::VL, node, registerVA, memrefSnippet1);
27252741

2726-
// registerVB = 0
2742+
// registerVB = 0, 0, 0, hash
27272743
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, registerVB, 0, 0 /*unused*/);
2744+
if (nonZeroInitial)
2745+
{
2746+
generateVRScInstruction(cg, TR::InstOpCode::VLVG, node, registerVB, registerHash, generateS390MemoryReference(3, cg), 2);
2747+
}
27282748

27292749
// ----------------- Incoming branch -----------------
27302750

27312751
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelVectorLoop);
27322752

2733-
// registerVC = 4 consecutive chars (16 bit shorts or 8 bit bytes depending on String Compression) at the current index
2734-
generateVRXInstruction(cg, TR::InstOpCode::VLLEZ, node, registerVC, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), elementSize);
2735-
2736-
if (!isCompressed)
2737-
{
2738-
// registerVC = unpack 4 (16 bit) short elements into 4 (32 bit) int elements
2739-
generateVRRaInstruction(cg, TR::InstOpCode::VUPLH, node, registerVC, registerVC, 0, 0, 1);
2740-
}
2741-
else
2753+
switch (elementType)
27422754
{
2743-
// registerVC = unpack 4 (8 bit) byte elements into 4 (32 bit) int elements
2744-
generateVRRaInstruction(cg, TR::InstOpCode::VUPLH, node, registerVC, registerVC, 0, 0, 0);
2745-
generateVRRaInstruction(cg, TR::InstOpCode::VUPLL, node, registerVC, registerVC, 0, 0, 1);
2755+
case TR::Int8:
2756+
// registerVC = 4 consecutive (8 bit) bytes at the current index
2757+
generateVRXInstruction(cg, TR::InstOpCode::VLLEZ, node, registerVC, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), 2);
2758+
// registerVC = unpack 4 (8 bit) byte elements into 4 (32 bit) int elements
2759+
generateVRRaInstruction(cg, isSigned ? TR::InstOpCode::VUPH : TR::InstOpCode::VUPLH, node, registerVC, registerVC, 0, 0, 0);
2760+
generateVRRaInstruction(cg, isSigned ? TR::InstOpCode::VUPL : TR::InstOpCode::VUPLL, node, registerVC, registerVC, 0, 0, 1);
2761+
break;
2762+
case TR::Int16:
2763+
// registerVC = 4 consecutive (16 bit) shorts at the current index
2764+
generateVRXInstruction(cg, TR::InstOpCode::VLLEZ, node, registerVC, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), 3);
2765+
// registerVC = unpack 4 (16 bit) short elements into 4 (32 bit) int elements
2766+
generateVRRaInstruction(cg, isSigned ? TR::InstOpCode::VUPL : TR::InstOpCode::VUPLH, node, registerVC, registerVC, 0, 0, 1);
2767+
break;
2768+
case TR::Int32:
2769+
// registerVC = 4 consecutive (32 bit) ints at the current index
2770+
generateVRXInstruction(cg, TR::InstOpCode::VL, node, registerVC, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2771+
break;
2772+
default:
2773+
TR_ASSERT_FATAL(false, "Unsupported vectorizedHashCode element type");
27462774
}
27472775

27482776
// registerVB = registerVB * registerVA + registerVC
27492777
generateVRRdInstruction(cg, TR::InstOpCode::VMAL, node, registerVB, registerVB, registerVA, registerVC, 0, 2);
27502778

2751-
// registerIndex += stringSize
2752-
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerIndex, generateS390MemoryReference(registerIndex, stringSize, cg));
2779+
// registerIndex += chunkSize
2780+
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerIndex, generateS390MemoryReference(registerIndex, chunkSize, cg));
27532781

27542782
// Branch to labelVectorLoop if registerIndex < registerEnd
27552783
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalRegOpCode(), node, registerIndex, registerEnd, TR::InstOpCode::COND_MASK4, labelVectorLoop, false, false);
@@ -2801,17 +2829,26 @@ J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg
28012829
// registerTemp -= registerHash
28022830
generateRRInstruction(cg, TR::InstOpCode::getSubstractRegOpCode(), node, registerTemp, registerHash);
28032831

2804-
// registerHash = char at registerIndex
2805-
if(isCompressed)
2806-
generateRXInstruction(cg, TR::InstOpCode::LLGC, node, registerHash, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2807-
else
2808-
generateRXInstruction(cg, TR::InstOpCode::LLH, node, registerHash, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2809-
2810-
if(isCompressed) //registerIndex += 1
2811-
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerIndex, generateS390MemoryReference(registerIndex, 1, cg));
2812-
else //registerIndex += 2
2813-
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerIndex, generateS390MemoryReference(registerIndex, 2, cg));
2832+
switch (elementType)
2833+
{
2834+
case TR::Int8:
2835+
// registerHash = byte at registerIndex
2836+
generateRXInstruction(cg, isSigned ? TR::InstOpCode::LB : TR::InstOpCode::LLC, node, registerHash, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2837+
break;
2838+
case TR::Int16:
2839+
// registerHash = short at registerIndex
2840+
generateRXInstruction(cg, isSigned ? TR::InstOpCode::LH : TR::InstOpCode::LLH, node, registerHash, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2841+
break;
2842+
case TR::Int32:
2843+
// registerHash = int at registerIndex
2844+
generateRXInstruction(cg, TR::InstOpCode::L, node, registerHash, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2845+
break;
2846+
default:
2847+
TR_ASSERT_FATAL(false, "Unsupported vectorizedHashCode element type");
2848+
}
28142849

2850+
//registerIndex += element size
2851+
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerIndex, generateS390MemoryReference(registerIndex, TR::DataType::getSize(elementType), cg));
28152852

28162853
// registerHash += registerTemp
28172854
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, registerHash, registerTemp);
@@ -2834,11 +2871,6 @@ J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg
28342871
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
28352872
cFlowRegionEnd->setEndInternalControlFlow();
28362873

2837-
// Cleanup nodes before returning
2838-
cg->decReferenceCount(nodeValue);
2839-
cg->decReferenceCount(nodeIndex);
2840-
cg->decReferenceCount(nodeCount);
2841-
28422874
// Cleanup registers before returning
28432875
cg->stopUsingRegister(registerValue);
28442876
cg->stopUsingRegister(registerIndex);
@@ -2850,7 +2882,51 @@ J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg
28502882
cg->stopUsingRegister(registerVB);
28512883
cg->stopUsingRegister(registerVC);
28522884

2853-
return node->setRegister(registerHash);
2885+
node->setRegister(registerHash);
2886+
2887+
// Cleanup nodes before returning
2888+
cg->decReferenceCount(nodeValue);
2889+
cg->decReferenceCount(nodeIndex);
2890+
cg->decReferenceCount(nodeCount);
2891+
if (nodeHash)
2892+
cg->decReferenceCount(nodeHash);
2893+
2894+
return registerHash;
2895+
}
2896+
2897+
TR::Register*
2898+
J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg, bool isCompressed)
2899+
{
2900+
return hashCodeHelper(node, cg, isCompressed ? TR::Int8 : TR::Int16, NULL, false);
2901+
}
2902+
2903+
TR::Register*
2904+
J9::Z::TreeEvaluator::inlineVectorizedHashCode(TR::Node* node, TR::CodeGenerator* cg)
2905+
{
2906+
TR::Register* registerHash = NULL;
2907+
2908+
switch (node->getChild(4)->getConstValue())
2909+
{
2910+
// The following constants come from the values for the type operand of the NEWARRAY instruction
2911+
// See https://docs.oracle.com/javase/specs/jvms/se9/html/jvms-6.html#jvms-6.5.newarray.
2912+
case 4: // T_BOOLEAN
2913+
registerHash = hashCodeHelper(node, cg, TR::Int8, node->getChild(3), false);
2914+
break;
2915+
case 8: // T_BYTE
2916+
registerHash = hashCodeHelper(node, cg, TR::Int8, node->getChild(3), true);
2917+
break;
2918+
case 5: // T_CHAR
2919+
case 9: // T_SHORT
2920+
registerHash = hashCodeHelper(node, cg, TR::Int16, node->getChild(3), false);
2921+
break;
2922+
case 10: // T_INT
2923+
registerHash = hashCodeHelper(node, cg, TR::Int32, node->getChild(3), false);
2924+
break;
2925+
}
2926+
if (registerHash != NULL)
2927+
cg->decReferenceCount(node->getChild(4));
2928+
2929+
return registerHash;
28542930
}
28552931

28562932
TR::Register*

0 commit comments

Comments
 (0)