@@ -2640,16 +2640,15 @@ J9::Z::TreeEvaluator::inlineUTF16BEEncodeSIMD(TR::Node *node, TR::CodeGenerator
2640
2640
return node->setRegister(translated);
2641
2641
}
2642
2642
2643
- TR::Register*
2644
- J9::Z::TreeEvaluator::inlineStringHashCode (TR::Node* node, TR::CodeGenerator* cg, bool isCompressed )
2643
+ static TR::Register*
2644
+ hashCodeHelper (TR::Node* node, TR::CodeGenerator* cg, TR::DataType elementType, TR::Node* nodeHash, bool isSigned )
2645
2645
{
2646
2646
TR::Compilation* comp = cg->comp();
2647
- //stringSize = Number of bytes to load to process 4 characters in SIMD loop
2647
+ //chunkSize = Number of bytes to load to process 4 characters in SIMD loop
2648
2648
//terminateVal = SIMD loop cotroller allowing characters in multiple of 4 to be processes by loop
2649
- //VLLEZ instruction will load word(compressed String) or double word (decompressed String), elementSize is used for that
2650
- const short stringSize = (isCompressed ? 4 : 8);
2651
- const short terminateVal = (isCompressed ? 3 : 6);
2652
- const short elementSize = (isCompressed ? 2 : 3);
2649
+ const int32_t chunkSize = 4 * TR::DataType::getSize(elementType);
2650
+ const int32_t terminateVal = 3 * TR::DataType::getSize(elementType);
2651
+ const bool nonZeroInitial = nodeHash != NULL && !nodeHash->isConstZeroValue();
2653
2652
2654
2653
TR::Node* nodeValue = node->getChild(0);
2655
2654
TR::Node* nodeIndex = node->getChild(1);
@@ -2669,8 +2668,6 @@ J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg
2669
2668
TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);
2670
2669
2671
2670
// Create the necessary registers
2672
- TR::Register* registerHash = cg->allocateRegister();
2673
-
2674
2671
TR::Register* registerValue = cg->evaluate(nodeValue);
2675
2672
TR::Register* registerIndex = cg->gprClobberEvaluate(nodeIndex);
2676
2673
TR::Register* registerCount = cg->gprClobberEvaluate(nodeCount);
@@ -2687,26 +2684,45 @@ J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg
2687
2684
2688
2685
TR::Register* registerEnd = cg->allocateRegister(TR_GPR);
2689
2686
2687
+ TR::Register* registerHash = NULL;
2688
+ if(nonZeroInitial)
2689
+ {
2690
+ registerHash = cg->gprClobberEvaluate(nodeHash);
2691
+ }
2692
+ else
2693
+ {
2694
+ registerHash = cg->allocateRegister();
2695
+ generateRREInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, registerHash, registerHash);
2696
+ }
2697
+
2690
2698
TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 12, cg);
2691
2699
2692
2700
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
2693
2701
cFlowRegionStart->setStartInternalControlFlow();
2694
2702
2695
- if(!isCompressed)
2703
+ int shiftBy = 2;
2704
+ switch (elementType)
2696
2705
{
2697
- // registerIndex *= 2 and registerCount *= 2
2698
- generateRSInstruction(cg, TR::InstOpCode::getShiftLeftLogicalSingleOpCode(), node, registerIndex, registerIndex, 1);
2699
- generateRSInstruction(cg, TR::InstOpCode::getShiftLeftLogicalSingleOpCode(), node, registerCount, registerCount, 1);
2706
+ case TR::Int8:
2707
+ break;
2708
+ case TR::Int16:
2709
+ // registerIndex *= 2 and registerCount *= 2
2710
+ shiftBy = 1;
2711
+ // intentional fallthrough
2712
+ case TR::Int32:
2713
+ // registerIndex *= 4 and registerCount *= 4
2714
+ generateRSInstruction(cg, TR::InstOpCode::getShiftLeftLogicalSingleOpCode(), node, registerIndex, registerIndex, shiftBy);
2715
+ generateRSInstruction(cg, TR::InstOpCode::getShiftLeftLogicalSingleOpCode(), node, registerCount, registerCount, shiftBy);
2716
+ break;
2717
+ default:
2718
+ TR_ASSERT_FATAL(false, "Unsupported vectorizedHashCode element type");
2700
2719
}
2701
2720
2702
2721
// registerEnd = registerIndex + registerCount
2703
2722
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerEnd, generateS390MemoryReference(registerIndex, registerCount, 0, cg));
2704
2723
2705
- // registerHash = 0
2706
- generateRREInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, registerHash, registerHash);
2707
-
2708
- // Branch to labelSerial if registerCount < stringSize
2709
- generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, registerCount, static_cast<int32_t>(stringSize), TR::InstOpCode::COND_MASK4, labelSerial, false, false);
2724
+ // Branch to labelSerial if registerCount < chunkSize
2725
+ generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, registerCount, chunkSize, TR::InstOpCode::COND_BL, labelSerial, false, false);
2710
2726
2711
2727
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelVector);
2712
2728
@@ -2723,33 +2739,45 @@ J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg
2723
2739
// registerVA = snippetData1
2724
2740
generateVRXInstruction(cg, TR::InstOpCode::VL, node, registerVA, memrefSnippet1);
2725
2741
2726
- // registerVB = 0
2742
+ // registerVB = 0, 0, 0, hash
2727
2743
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, registerVB, 0, 0 /*unused*/);
2744
+ if (nonZeroInitial)
2745
+ {
2746
+ generateVRScInstruction(cg, TR::InstOpCode::VLVG, node, registerVB, registerHash, generateS390MemoryReference(3, cg), 2);
2747
+ }
2728
2748
2729
2749
// ----------------- Incoming branch -----------------
2730
2750
2731
2751
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelVectorLoop);
2732
2752
2733
- // registerVC = 4 consecutive chars (16 bit shorts or 8 bit bytes depending on String Compression) at the current index
2734
- generateVRXInstruction(cg, TR::InstOpCode::VLLEZ, node, registerVC, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), elementSize);
2735
-
2736
- if (!isCompressed)
2737
- {
2738
- // registerVC = unpack 4 (16 bit) short elements into 4 (32 bit) int elements
2739
- generateVRRaInstruction(cg, TR::InstOpCode::VUPLH, node, registerVC, registerVC, 0, 0, 1);
2740
- }
2741
- else
2753
+ switch (elementType)
2742
2754
{
2743
- // registerVC = unpack 4 (8 bit) byte elements into 4 (32 bit) int elements
2744
- generateVRRaInstruction(cg, TR::InstOpCode::VUPLH, node, registerVC, registerVC, 0, 0, 0);
2745
- generateVRRaInstruction(cg, TR::InstOpCode::VUPLL, node, registerVC, registerVC, 0, 0, 1);
2755
+ case TR::Int8:
2756
+ // registerVC = 4 consecutive (8 bit) bytes at the current index
2757
+ generateVRXInstruction(cg, TR::InstOpCode::VLLEZ, node, registerVC, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), 2);
2758
+ // registerVC = unpack 4 (8 bit) byte elements into 4 (32 bit) int elements
2759
+ generateVRRaInstruction(cg, isSigned ? TR::InstOpCode::VUPH : TR::InstOpCode::VUPLH, node, registerVC, registerVC, 0, 0, 0);
2760
+ generateVRRaInstruction(cg, isSigned ? TR::InstOpCode::VUPL : TR::InstOpCode::VUPLL, node, registerVC, registerVC, 0, 0, 1);
2761
+ break;
2762
+ case TR::Int16:
2763
+ // registerVC = 4 consecutive (16 bit) shorts at the current index
2764
+ generateVRXInstruction(cg, TR::InstOpCode::VLLEZ, node, registerVC, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), 3);
2765
+ // registerVC = unpack 4 (16 bit) short elements into 4 (32 bit) int elements
2766
+ generateVRRaInstruction(cg, isSigned ? TR::InstOpCode::VUPL : TR::InstOpCode::VUPLH, node, registerVC, registerVC, 0, 0, 1);
2767
+ break;
2768
+ case TR::Int32:
2769
+ // registerVC = 4 consecutive (32 bit) ints at the current index
2770
+ generateVRXInstruction(cg, TR::InstOpCode::VL, node, registerVC, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2771
+ break;
2772
+ default:
2773
+ TR_ASSERT_FATAL(false, "Unsupported vectorizedHashCode element type");
2746
2774
}
2747
2775
2748
2776
// registerVB = registerVB * registerVA + registerVC
2749
2777
generateVRRdInstruction(cg, TR::InstOpCode::VMAL, node, registerVB, registerVB, registerVA, registerVC, 0, 2);
2750
2778
2751
- // registerIndex += stringSize
2752
- generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerIndex, generateS390MemoryReference(registerIndex, stringSize , cg));
2779
+ // registerIndex += chunkSize
2780
+ generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerIndex, generateS390MemoryReference(registerIndex, chunkSize , cg));
2753
2781
2754
2782
// Branch to labelVectorLoop if registerIndex < registerEnd
2755
2783
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalRegOpCode(), node, registerIndex, registerEnd, TR::InstOpCode::COND_MASK4, labelVectorLoop, false, false);
@@ -2801,17 +2829,26 @@ J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg
2801
2829
// registerTemp -= registerHash
2802
2830
generateRRInstruction(cg, TR::InstOpCode::getSubstractRegOpCode(), node, registerTemp, registerHash);
2803
2831
2804
- // registerHash = char at registerIndex
2805
- if(isCompressed)
2806
- generateRXInstruction(cg, TR::InstOpCode::LLGC, node, registerHash, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2807
- else
2808
- generateRXInstruction(cg, TR::InstOpCode::LLH, node, registerHash, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2809
-
2810
- if(isCompressed) //registerIndex += 1
2811
- generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerIndex, generateS390MemoryReference(registerIndex, 1, cg));
2812
- else //registerIndex += 2
2813
- generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerIndex, generateS390MemoryReference(registerIndex, 2, cg));
2832
+ switch (elementType)
2833
+ {
2834
+ case TR::Int8:
2835
+ // registerHash = byte at registerIndex
2836
+ generateRXInstruction(cg, isSigned ? TR::InstOpCode::LB : TR::InstOpCode::LLC, node, registerHash, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2837
+ break;
2838
+ case TR::Int16:
2839
+ // registerHash = short at registerIndex
2840
+ generateRXInstruction(cg, isSigned ? TR::InstOpCode::LH : TR::InstOpCode::LLH, node, registerHash, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2841
+ break;
2842
+ case TR::Int32:
2843
+ // registerHash = int at registerIndex
2844
+ generateRXInstruction(cg, TR::InstOpCode::L, node, registerHash, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2845
+ break;
2846
+ default:
2847
+ TR_ASSERT_FATAL(false, "Unsupported vectorizedHashCode element type");
2848
+ }
2814
2849
2850
+ //registerIndex += element size
2851
+ generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerIndex, generateS390MemoryReference(registerIndex, TR::DataType::getSize(elementType), cg));
2815
2852
2816
2853
// registerHash += registerTemp
2817
2854
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, registerHash, registerTemp);
@@ -2834,11 +2871,6 @@ J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg
2834
2871
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
2835
2872
cFlowRegionEnd->setEndInternalControlFlow();
2836
2873
2837
- // Cleanup nodes before returning
2838
- cg->decReferenceCount(nodeValue);
2839
- cg->decReferenceCount(nodeIndex);
2840
- cg->decReferenceCount(nodeCount);
2841
-
2842
2874
// Cleanup registers before returning
2843
2875
cg->stopUsingRegister(registerValue);
2844
2876
cg->stopUsingRegister(registerIndex);
@@ -2850,7 +2882,51 @@ J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg
2850
2882
cg->stopUsingRegister(registerVB);
2851
2883
cg->stopUsingRegister(registerVC);
2852
2884
2853
- return node->setRegister(registerHash);
2885
+ node->setRegister(registerHash);
2886
+
2887
+ // Cleanup nodes before returning
2888
+ cg->decReferenceCount(nodeValue);
2889
+ cg->decReferenceCount(nodeIndex);
2890
+ cg->decReferenceCount(nodeCount);
2891
+ if (nodeHash)
2892
+ cg->decReferenceCount(nodeHash);
2893
+
2894
+ return registerHash;
2895
+ }
2896
+
2897
+ TR::Register*
2898
+ J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg, bool isCompressed)
2899
+ {
2900
+ return hashCodeHelper(node, cg, isCompressed ? TR::Int8 : TR::Int16, NULL, false);
2901
+ }
2902
+
2903
+ TR::Register*
2904
+ J9::Z::TreeEvaluator::inlineVectorizedHashCode(TR::Node* node, TR::CodeGenerator* cg)
2905
+ {
2906
+ TR::Register* registerHash = NULL;
2907
+
2908
+ switch (node->getChild(4)->getConstValue())
2909
+ {
2910
+ // The following constants come from the values for the type operand of the NEWARRAY instruction
2911
+ // See https://docs.oracle.com/javase/specs/jvms/se9/html/jvms-6.html#jvms-6.5.newarray.
2912
+ case 4: // T_BOOLEAN
2913
+ registerHash = hashCodeHelper(node, cg, TR::Int8, node->getChild(3), false);
2914
+ break;
2915
+ case 8: // T_BYTE
2916
+ registerHash = hashCodeHelper(node, cg, TR::Int8, node->getChild(3), true);
2917
+ break;
2918
+ case 5: // T_CHAR
2919
+ case 9: // T_SHORT
2920
+ registerHash = hashCodeHelper(node, cg, TR::Int16, node->getChild(3), false);
2921
+ break;
2922
+ case 10: // T_INT
2923
+ registerHash = hashCodeHelper(node, cg, TR::Int32, node->getChild(3), false);
2924
+ break;
2925
+ }
2926
+ if (registerHash != NULL)
2927
+ cg->decReferenceCount(node->getChild(4));
2928
+
2929
+ return registerHash;
2854
2930
}
2855
2931
2856
2932
TR::Register*
0 commit comments