diff --git a/Backport-JDK-8319716-8327283-RISC-V-Add-SHA-2.patch b/Backport-JDK-8319716-8327283-RISC-V-Add-SHA-2.patch
new file mode 100644
index 0000000000000000000000000000000000000000..16763dc8a0b363c01d888167f340e636207195cf
--- /dev/null
+++ b/Backport-JDK-8319716-8327283-RISC-V-Add-SHA-2.patch
@@ -0,0 +1,920 @@
+diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
+index 24de7c15f..24e5f4fa8 100644
+--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
+@@ -1312,6 +1312,7 @@ enum VectorMask {
+   INSN(vsll_vi,    0b1010111, 0b011, 0b100101);
+ 
+   // Vector Slide Instructions
++  INSN(vslideup_vi,   0b1010111, 0b011, 0b001110);
+   INSN(vslidedown_vi, 0b1010111, 0b011, 0b001111);
+ 
+ #undef INSN
+@@ -1666,7 +1667,6 @@ enum VectorMask {
+   INSN(vmv_v_x, 0b1010111, 0b100, v0, 0b1, 0b010111);
+ 
+ #undef INSN
+-#undef patch_VArith
+ 
+ #define INSN(NAME, op, funct13, funct6)                    \
+   void NAME(VectorRegister Vd, VectorMask vm = unmasked) { \
+@@ -1708,14 +1708,29 @@ enum Nf {
+     patch_reg((address)&insn, 15, Rs1);                                  \
+     emit(insn)
+ 
+-#define INSN(NAME, op, lumop, vm, mop, nf)                                           \
+-  void NAME(VectorRegister Vd, Register Rs1, uint32_t width = 0, bool mew = false) { \
++#define INSN(NAME, op, width, lumop, vm, mop, mew, nf)                               \
++  void NAME(VectorRegister Vd, Register Rs1) {                                       \
+     guarantee(is_uimm3(width), "width is invalid");                                  \
+     patch_VLdSt(op, Vd, width, Rs1, lumop, vm, mop, mew, nf);                        \
+   }
+ 
+   // Vector Load/Store Instructions
+-  INSN(vl1re8_v, 0b0000111, 0b01000, 0b1, 0b00, g1);
++  INSN(vl1re8_v,  0b0000111, 0b000, 0b01000, 0b1, 0b00, 0b0, g1);
++  INSN(vl1re16_v, 0b0000111, 0b101, 0b01000, 0b1, 0b00, 0b0, g1);
++  INSN(vl1re32_v, 0b0000111, 0b110, 0b01000, 0b1, 0b00, 0b0, g1);
++  INSN(vl1re64_v, 0b0000111, 0b111, 0b01000, 0b1, 0b00, 0b0, g1);
++  INSN(vl2re8_v,  0b0000111, 0b000, 0b01000, 0b1, 0b00, 0b0, g2);
++  INSN(vl2re16_v, 0b0000111, 0b101, 0b01000, 0b1, 0b00, 0b0, g2);
++  INSN(vl2re32_v, 0b0000111, 0b110, 0b01000, 0b1, 0b00, 0b0, g2);
++  INSN(vl2re64_v, 0b0000111, 0b111, 0b01000, 0b1, 0b00, 0b0, g2);
++  INSN(vl4re8_v,  0b0000111, 0b000, 0b01000, 0b1, 0b00, 0b0, g4);
++  INSN(vl4re16_v, 0b0000111, 0b101, 0b01000, 0b1, 0b00, 0b0, g4);
++  INSN(vl4re32_v, 0b0000111, 0b110, 0b01000, 0b1, 0b00, 0b0, g4);
++  INSN(vl4re64_v, 0b0000111, 0b111, 0b01000, 0b1, 0b00, 0b0, g4);
++  INSN(vl8re8_v,  0b0000111, 0b000, 0b01000, 0b1, 0b00, 0b0, g8);
++  INSN(vl8re16_v, 0b0000111, 0b101, 0b01000, 0b1, 0b00, 0b0, g8);
++  INSN(vl8re32_v, 0b0000111, 0b110, 0b01000, 0b1, 0b00, 0b0, g8);
++  INSN(vl8re64_v, 0b0000111, 0b111, 0b01000, 0b1, 0b00, 0b0, g8);
+ 
+ #undef INSN
+ 
+@@ -1726,6 +1741,9 @@ enum Nf {
+ 
+   // Vector Load/Store Instructions
+   INSN(vs1r_v, 0b0100111, 0b000, 0b01000, 0b1, 0b00, 0b0, g1);
++  INSN(vs2r_v, 0b0100111, 0b000, 0b01000, 0b1, 0b00, 0b0, g2);
++  INSN(vs4r_v, 0b0100111, 0b000, 0b01000, 0b1, 0b00, 0b0, g4);
++  INSN(vs8r_v, 0b0100111, 0b000, 0b01000, 0b1, 0b00, 0b0, g8);
+ 
+ #undef INSN
+ 
+@@ -1771,10 +1789,12 @@ enum Nf {
+   }
+ 
+   // Vector unordered indexed load instructions
++  INSN( vluxei8_v, 0b0000111, 0b000, 0b01, 0b0);
+   INSN(vluxei32_v, 0b0000111, 0b110, 0b01, 0b0);
+   INSN(vluxei64_v, 0b0000111, 0b111, 0b01, 0b0);
+ 
+   // Vector unordered indexed store instructions
++  INSN( vsuxei8_v, 0b0100111, 0b000, 0b01, 0b0);
+   INSN(vsuxei32_v, 0b0100111, 0b110, 0b01, 0b0);
+   INSN(vsuxei64_v, 0b0100111, 0b111, 0b01, 0b0);
+ 
+@@ -1794,6 +1814,55 @@ enum Nf {
+ #undef INSN
+ #undef patch_VLdSt
+ 
++// ====================================
++// RISC-V Vector Crypto Extension
++// ====================================
++
++#define INSN(NAME, op, funct3, funct6)                                                             \
++  void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1, VectorMask vm = unmasked) { \
++    patch_VArith(op, Vd, funct3, Vs1->raw_encoding(), Vs2, vm, funct6);                            \
++  }
++
++  // Vector Bit-manipulation used in Cryptography (Zvkb) Extension
++  INSN(vandn_vv,   0b1010111, 0b000, 0b000001);
++  INSN(vandn_vx,   0b1010111, 0b100, 0b000001);
++  INSN(vandn_vi,   0b1010111, 0b011, 0b000001);
++  INSN(vclmul_vv,  0b1010111, 0b010, 0b001100);
++  INSN(vclmul_vx,  0b1010111, 0b110, 0b001100);
++  INSN(vclmulh_vv, 0b1010111, 0b010, 0b001101);
++  INSN(vclmulh_vx, 0b1010111, 0b110, 0b001101);
++  INSN(vror_vv,    0b1010111, 0b000, 0b010100);
++  INSN(vror_vx,    0b1010111, 0b100, 0b010100);
++  INSN(vrol_vv,    0b1010111, 0b000, 0b010101);
++  INSN(vrol_vx,    0b1010111, 0b100, 0b010101);
++
++#undef INSN
++
++#define INSN(NAME, op, funct3, Vs1, funct6)                                    \
++  void NAME(VectorRegister Vd, VectorRegister Vs2, VectorMask vm = unmasked) { \
++    patch_VArith(op, Vd, funct3, Vs1, Vs2, vm, funct6);                        \
++  }
++
++  // Vector Bit-manipulation used in Cryptography (Zvkb) Extension
++  INSN(vbrev8_v, 0b1010111, 0b010, 0b01000, 0b010010);
++  INSN(vrev8_v,  0b1010111, 0b010, 0b01001, 0b010010);
++
++#undef INSN
++
++#define INSN(NAME, op, funct3, vm, funct6)                                   \
++  void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1) {     \
++    patch_VArith(op, Vd, funct3, Vs1->raw_encoding(), Vs2, vm, funct6);      \
++  }
++
++  // Vector SHA-2 Secure Hash (Zvknh[ab]) Extension
++  INSN(vsha2ms_vv,  0b1110111, 0b010, 0b1, 0b101101);
++  INSN(vsha2ch_vv,  0b1110111, 0b010, 0b1, 0b101110);
++  INSN(vsha2cl_vv,  0b1110111, 0b010, 0b1, 0b101111);
++
++#undef INSN
++
++#undef patch_VArith
++
+ // ====================================
+ // RISC-V Bit-Manipulation Extension
+ // Currently only support Zba, Zbb and Zbs bitmanip extensions.
+diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
+index 2ee0b4b94..4d39d9905 100644
+--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
+@@ -112,6 +112,8 @@ define_pd_global(intx, InlineSmallCode,          1000);
+   product(bool, UseZicboz, false, EXPERIMENTAL, "Use Zicboz instructions")       \
+   product(bool, UseZihintpause, false, EXPERIMENTAL,                             \
+           "Use Zihintpause instructions")                                        \
++  product(bool, UseZvkn, false, EXPERIMENTAL,                                    \
++          "Use Zvkn group extension, Zvkned, Zvknhb, Zvkb, Zvkt")                \
+   product(bool, UseRVVForBigIntegerShiftIntrinsics, true,                        \
+           "Use RVV instructions for left/right shift of BigInteger")
+ 
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+index 08c953b2c..77cc5e056 100644
+--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -1331,6 +1331,16 @@ public:
+     vmfle_vv(vd, vs1, vs2, vm);
+   }
+ 
++  inline void vmsltu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
++    guarantee(imm >= 1 && imm <= 16, "imm is invalid");
++    vmsleu_vi(Vd, Vs2, imm-1, vm);
++  }
++
++  inline void vmsgeu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
++    guarantee(imm >= 1 && imm <= 16, "imm is invalid");
++    vmsgtu_vi(Vd, Vs2, imm-1, vm);
++  }
++
+   // Copy mask register
+   inline void vmmv_m(VectorRegister vd, VectorRegister vs) {
+     vmand_mm(vd, vs, vs);
+@@ -1346,6 +1356,10 @@ public:
+     vmxnor_mm(vd, vd, vd);
+   }
+ 
++  inline void vnot_v(VectorRegister Vd, VectorRegister Vs, VectorMask vm = unmasked) {
++    vxor_vi(Vd, Vs, -1, vm);
++  }
++
+   static const int zero_words_block_size;
+ 
+   void cast_primitive_type(BasicType type, Register Rt) {
+diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+index 8c5e1c097..dec9a8464 100644
+--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+@@ -3715,118 +3715,8 @@ class StubGenerator: public StubCodeGenerator {
+       return entry;
+     }
+   };
+-#endif // COMPILER2
+-
+-  // Continuation point for throwing of implicit exceptions that are
+-  // not handled in the current activation. Fabricates an exception
+-  // oop and initiates normal exception dispatching in this
+-  // frame. Since we need to preserve callee-saved values (currently
+-  // only for C2, but done for C1 as well) we need a callee-saved oop
+-  // map and therefore have to make these stubs into RuntimeStubs
+-  // rather than BufferBlobs.  If the compiler needs all registers to
+-  // be preserved between the fault point and the exception handler
+-  // then it must assume responsibility for that in
+-  // AbstractCompiler::continuation_for_implicit_null_exception or
+-  // continuation_for_implicit_division_by_zero_exception. All other
+-  // implicit exceptions (e.g., NullPointerException or
+-  // AbstractMethodError on entry) are either at call sites or
+-  // otherwise assume that stack unwinding will be initiated, so
+-  // caller saved registers were assumed volatile in the compiler.
+-
+-#undef __
+-#define __ masm->
+-
+-  address generate_throw_exception(const char* name,
+-                                   address runtime_entry,
+-                                   Register arg1 = noreg,
+-                                   Register arg2 = noreg) {
+-    // Information about frame layout at time of blocking runtime call.
+-    // Note that we only have to preserve callee-saved registers since
+-    // the compilers are responsible for supplying a continuation point
+-    // if they expect all registers to be preserved.
+-    // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0
+-    assert_cond(runtime_entry != nullptr);
+-    enum layout {
+-      fp_off = 0,
+-      fp_off2,
+-      return_off,
+-      return_off2,
+-      framesize // inclusive of return address
+-    };
+-
+-    const int insts_size = 1024;
+-    const int locs_size  = 64;
+-
+-    CodeBuffer code(name, insts_size, locs_size);
+-    OopMapSet* oop_maps  = new OopMapSet();
+-    MacroAssembler* masm = new MacroAssembler(&code);
+-    assert_cond(oop_maps != nullptr && masm != nullptr);
+-
+-    address start = __ pc();
+-
+-    // This is an inlined and slightly modified version of call_VM
+-    // which has the ability to fetch the return PC out of
+-    // thread-local storage and also sets up last_Java_sp slightly
+-    // differently than the real call_VM
+-
+-    __ enter(); // Save FP and RA before call
+-
+-    assert(is_even(framesize / 2), "sp not 16-byte aligned");
+-
+-    // ra and fp are already in place
+-    __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog
+-
+-    int frame_complete = __ pc() - start;
+-
+-    // Set up last_Java_sp and last_Java_fp
+-    address the_pc = __ pc();
+-    __ set_last_Java_frame(sp, fp, the_pc, t0);
+-
+-    // Call runtime
+-    if (arg1 != noreg) {
+-      assert(arg2 != c_rarg1, "clobbered");
+-      __ mv(c_rarg1, arg1);
+-    }
+-    if (arg2 != noreg) {
+-      __ mv(c_rarg2, arg2);
+-    }
+-    __ mv(c_rarg0, xthread);
+-    BLOCK_COMMENT("call runtime_entry");
+-    __ call(runtime_entry);
+ 
+-    // Generate oop map
+-    OopMap* map = new OopMap(framesize, 0);
+-    assert_cond(map != nullptr);
+-
+-    oop_maps->add_gc_map(the_pc - start, map);
+-
+-    __ reset_last_Java_frame(true);
+-
+-    __ leave();
+-
+-    // check for pending exceptions
+-#ifdef ASSERT
+-    Label L;
+-    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+-    __ bnez(t0, L);
+-    __ should_not_reach_here();
+-    __ bind(L);
+-#endif // ASSERT
+-    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+-
+-    // codeBlob framesize is in words (not VMRegImpl::slot_size)
+-    RuntimeStub* stub =
+-      RuntimeStub::new_runtime_stub(name,
+-                                    &code,
+-                                    frame_complete,
+-                                    (framesize >> (LogBytesPerWord - LogBytesPerInt)),
+-                                    oop_maps, false);
+-    assert(stub != nullptr, "create runtime stub fail!");
+-    return stub->entry_point();
+-  }
+-
+-#undef __
+-#define __ _masm->
++#endif // COMPILER2
+ 
+   address generate_cont_thaw(Continuation::thaw_kind kind) {
+     bool return_barrier = Continuation::is_thaw_return_barrier(kind);
+@@ -3970,6 +3860,395 @@ class StubGenerator: public StubCodeGenerator {
+     return start;
+   }
+ 
++#if COMPILER2_OR_JVMCI
++
++#undef __
++#define __ this->
++
++  class Sha2Generator : public MacroAssembler {
++    StubCodeGenerator* _cgen;
++   public:
++      Sha2Generator(MacroAssembler* masm, StubCodeGenerator* cgen) : MacroAssembler(masm->code()), _cgen(cgen) {}
++      address generate_sha256_implCompress(bool multi_block) {
++        return generate_sha2_implCompress(Assembler::e32, multi_block);
++      }
++      address generate_sha512_implCompress(bool multi_block) {
++        return generate_sha2_implCompress(Assembler::e64, multi_block);
++      }
++   private:
++
++    void vleXX_v(Assembler::SEW vset_sew, VectorRegister vr, Register sr) {
++      if (vset_sew == Assembler::e32) __ vle32_v(vr, sr);
++      else                            __ vle64_v(vr, sr);
++    }
++
++    void vseXX_v(Assembler::SEW vset_sew, VectorRegister vr, Register sr) {
++      if (vset_sew == Assembler::e32) __ vse32_v(vr, sr);
++      else                            __ vse64_v(vr, sr);
++    }
++
++    // Overview of the logic in each "quad round".
++    //
++    // The code below repeats 16/20 times the logic implementing four rounds
++    // of the SHA-256/512 core loop as documented by NIST. 16/20 "quad rounds"
++    // to implementing the 64/80 single rounds.
++    //
++    //    // Load four word (u32/64) constants (K[t+3], K[t+2], K[t+1], K[t+0])
++    //    // Output:
++    //    //   vTmp1 = {K[t+3], K[t+2], K[t+1], K[t+0]}
++    //    vl1reXX.v vTmp1, ofs
++    //
++    //    // Increment word constant address by stride (16/32 bytes, 4*4B/8B, 128b/256b)
++    //    addi ofs, ofs, 16/32
++    //
++    //    // Add constants to message schedule words:
++    //    //  Input
++    //    //    vTmp1 = {K[t+3], K[t+2], K[t+1], K[t+0]}
++    //    //    vW0 = {W[t+3], W[t+2], W[t+1], W[t+0]}; // Vt0 = W[3:0];
++    //    //  Output
++    //    //    vTmp0 = {W[t+3]+K[t+3], W[t+2]+K[t+2], W[t+1]+K[t+1], W[t+0]+K[t+0]}
++    //    vadd.vv vTmp0, vTmp1, vW0
++    //
++    //    //  2 rounds of working variables updates.
++    //    //     vState1[t+4] <- vState1[t], vState0[t], vTmp0[t]
++    //    //  Input:
++    //    //    vState1 = {c[t],d[t],g[t],h[t]}   " = vState1[t] "
++    //    //    vState0 = {a[t],b[t],e[t],f[t]}
++    //    //    vTmp0 = {W[t+3]+K[t+3], W[t+2]+K[t+2], W[t+1]+K[t+1], W[t+0]+K[t+0]}
++    //    //  Output:
++    //    //    vState1 = {f[t+2],e[t+2],b[t+2],a[t+2]}  " = vState0[t+2] "
++    //    //        = {h[t+4],g[t+4],d[t+4],c[t+4]}  " = vState1[t+4] "
++    //    vsha2cl.vv vState1, vState0, vTmp0
++    //
++    //    //  2 rounds of working variables updates.
++    //    //     vState0[t+4] <- vState0[t], vState0[t+2], vTmp0[t]
++    //    //  Input
++    //    //   vState0 = {a[t],b[t],e[t],f[t]}       " = vState0[t] "
++    //    //       = {h[t+2],g[t+2],d[t+2],c[t+2]}   " = vState1[t+2] "
++    //    //   vState1 = {f[t+2],e[t+2],b[t+2],a[t+2]}   " = vState0[t+2] "
++    //    //   vTmp0 = {W[t+3]+K[t+3], W[t+2]+K[t+2], W[t+1]+K[t+1], W[t+0]+K[t+0]}
++    //    //  Output:
++    //    //   vState0 = {f[t+4],e[t+4],b[t+4],a[t+4]}   " = vState0[t+4] "
++    //    vsha2ch.vv vState0, vState1, vTmp0
++    //
++    //    // Combine 2QW into 1QW
++    //    //
++    //    // To generate the next 4 words, "new_vW0"/"vTmp0" from vW0-vW3, vsha2ms needs
++    //    //     vW0[0..3], vW1[0], vW2[1..3], vW3[0, 2..3]
++    //    // and it can only take 3 vectors as inputs. Hence we need to combine
++    //    // vW1[0] and vW2[1..3] in a single vector.
++    //    //
++    //    // vmerge Vt4, Vt1, Vt2, V0
++    //    // Input
++    //    //  V0 = mask // first word from vW2, 1..3 words from vW1
++    //    //  vW2 = {Wt-8, Wt-7, Wt-6, Wt-5}
++    //    //  vW1 = {Wt-12, Wt-11, Wt-10, Wt-9}
++    //    // Output
++    //    //  Vt4 = {Wt-12, Wt-7, Wt-6, Wt-5}
++    //    vmerge.vvm vTmp0, vW2, vW1, v0
++    //
++    //    // Generate next Four Message Schedule Words (hence allowing for 4 more rounds)
++    //    // Input
++    //    //  vW0 = {W[t+ 3], W[t+ 2], W[t+ 1], W[t+ 0]}     W[ 3: 0]
++    //    //  vW3 = {W[t+15], W[t+14], W[t+13], W[t+12]}     W[15:12]
++    //    //  vTmp0 = {W[t+11], W[t+10], W[t+ 9], W[t+ 4]}     W[11: 9,4]
++    //    // Output (next four message schedule words)
++    //    //  vW0 = {W[t+19],  W[t+18],  W[t+17],  W[t+16]}  W[19:16]
++    //    vsha2ms.vv vW0, vTmp0, vW3
++    //
++    // BEFORE
++    //  vW0 - vW3 hold the message schedule words (initially the block words)
++    //    vW0 = W[ 3: 0]   "oldest"
++    //    vW1 = W[ 7: 4]
++    //    vW2 = W[11: 8]
++    //    vW3 = W[15:12]   "newest"
++    //
++    //  vt6 - vt7 hold the working state variables
++    //    vState0 = {a[t],b[t],e[t],f[t]}   // initially {H5,H4,H1,H0}
++    //    vState1 = {c[t],d[t],g[t],h[t]}   // initially {H7,H6,H3,H2}
++    //
++    // AFTER
++    //  vW0 - vW3 hold the message schedule words (initially the block words)
++    //    vW1 = W[ 7: 4]   "oldest"
++    //    vW2 = W[11: 8]
++    //    vW3 = W[15:12]
++    //    vW0 = W[19:16]   "newest"
++    //
++    //  vState0 and vState1 hold the working state variables
++    //    vState0 = {a[t+4],b[t+4],e[t+4],f[t+4]}
++    //    vState1 = {c[t+4],d[t+4],g[t+4],h[t+4]}
++    //
++    //  The group of vectors vW0,vW1,vW2,vW3 is "rotated" by one in each quad-round,
++    //  hence the uses of those vectors rotate in each round, and we get back to the
++    //  initial configuration every 4 quad-rounds. We could avoid those changes at
++    //  the cost of moving those vectors at the end of each quad-rounds.
++    void sha2_quad_round(Assembler::SEW vset_sew, VectorRegister rot1, VectorRegister rot2, VectorRegister rot3, VectorRegister rot4,
++                         Register scalarconst, VectorRegister vtemp, VectorRegister vtemp2, VectorRegister v_abef, VectorRegister v_cdgh,
++                         bool gen_words = true, bool step_const = true) {
++      __ vleXX_v(vset_sew, vtemp, scalarconst);
++      if (step_const) {
++        __ addi(scalarconst, scalarconst, vset_sew == Assembler::e32 ? 16 : 32);
++      }
++      __ vadd_vv(vtemp2, vtemp, rot1);
++      __ vsha2cl_vv(v_cdgh, v_abef, vtemp2);
++      __ vsha2ch_vv(v_abef, v_cdgh, vtemp2);
++      if (gen_words) {
++        __ vmerge_vvm(vtemp2, rot3, rot2);
++        __ vsha2ms_vv(rot1, vtemp2, rot4);
++      }
++    }
++
++    const char* stub_name(Assembler::SEW vset_sew, bool multi_block) {
++      if (vset_sew == Assembler::e32 && !multi_block) return "sha256_implCompress";
++      if (vset_sew == Assembler::e32 &&  multi_block) return "sha256_implCompressMB";
++      if (vset_sew == Assembler::e64 && !multi_block) return "sha512_implCompress";
++      if (vset_sew == Assembler::e64 &&  multi_block) return "sha512_implCompressMB";
++      ShouldNotReachHere();
++      return "bad name lookup";
++    }
++
++    // Arguments:
++    //
++    // Inputs:
++    //   c_rarg0   - byte[]  source+offset
++    //   c_rarg1   - int[]   SHA.state
++    //   c_rarg2   - int     offset
++    //   c_rarg3   - int     limit
++    //
++    address generate_sha2_implCompress(Assembler::SEW vset_sew, bool multi_block) {
++      alignas(64) static const uint32_t round_consts_256[64] = {
++        0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
++        0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
++        0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
++        0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
++        0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
++        0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
++        0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
++        0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
++        0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
++        0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
++        0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
++        0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
++        0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
++        0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
++        0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
++        0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
++      };
++      alignas(64) static const uint64_t round_consts_512[80] = {
++        0x428a2f98d728ae22l, 0x7137449123ef65cdl, 0xb5c0fbcfec4d3b2fl,
++        0xe9b5dba58189dbbcl, 0x3956c25bf348b538l, 0x59f111f1b605d019l,
++        0x923f82a4af194f9bl, 0xab1c5ed5da6d8118l, 0xd807aa98a3030242l,
++        0x12835b0145706fbel, 0x243185be4ee4b28cl, 0x550c7dc3d5ffb4e2l,
++        0x72be5d74f27b896fl, 0x80deb1fe3b1696b1l, 0x9bdc06a725c71235l,
++        0xc19bf174cf692694l, 0xe49b69c19ef14ad2l, 0xefbe4786384f25e3l,
++        0x0fc19dc68b8cd5b5l, 0x240ca1cc77ac9c65l, 0x2de92c6f592b0275l,
++        0x4a7484aa6ea6e483l, 0x5cb0a9dcbd41fbd4l, 0x76f988da831153b5l,
++        0x983e5152ee66dfabl, 0xa831c66d2db43210l, 0xb00327c898fb213fl,
++        0xbf597fc7beef0ee4l, 0xc6e00bf33da88fc2l, 0xd5a79147930aa725l,
++        0x06ca6351e003826fl, 0x142929670a0e6e70l, 0x27b70a8546d22ffcl,
++        0x2e1b21385c26c926l, 0x4d2c6dfc5ac42aedl, 0x53380d139d95b3dfl,
++        0x650a73548baf63del, 0x766a0abb3c77b2a8l, 0x81c2c92e47edaee6l,
++        0x92722c851482353bl, 0xa2bfe8a14cf10364l, 0xa81a664bbc423001l,
++        0xc24b8b70d0f89791l, 0xc76c51a30654be30l, 0xd192e819d6ef5218l,
++        0xd69906245565a910l, 0xf40e35855771202al, 0x106aa07032bbd1b8l,
++        0x19a4c116b8d2d0c8l, 0x1e376c085141ab53l, 0x2748774cdf8eeb99l,
++        0x34b0bcb5e19b48a8l, 0x391c0cb3c5c95a63l, 0x4ed8aa4ae3418acbl,
++        0x5b9cca4f7763e373l, 0x682e6ff3d6b2b8a3l, 0x748f82ee5defb2fcl,
++        0x78a5636f43172f60l, 0x84c87814a1f0ab72l, 0x8cc702081a6439ecl,
++        0x90befffa23631e28l, 0xa4506cebde82bde9l, 0xbef9a3f7b2c67915l,
++        0xc67178f2e372532bl, 0xca273eceea26619cl, 0xd186b8c721c0c207l,
++        0xeada7dd6cde0eb1el, 0xf57d4f7fee6ed178l, 0x06f067aa72176fbal,
++        0x0a637dc5a2c898a6l, 0x113f9804bef90dael, 0x1b710b35131c471bl,
++        0x28db77f523047d84l, 0x32caab7b40c72493l, 0x3c9ebe0a15c9bebcl,
++        0x431d67c49c100d4cl, 0x4cc5d4becb3e42b6l, 0x597f299cfc657e2al,
++        0x5fcb6fab3ad6faecl, 0x6c44198c4a475817l
++      };
++      const int const_add = vset_sew == Assembler::e32 ? 16 : 32;
++
++      __ align(CodeEntryAlignment);
++      StubCodeMark mark(_cgen, "StubRoutines", stub_name(vset_sew, multi_block));
++      address start = __ pc();
++
++      Register buf   = c_rarg0;
++      Register state = c_rarg1;
++      Register ofs   = c_rarg2;
++      Register limit = c_rarg3;
++      Register consts =  t2; // caller saved
++      Register state_c = x28; // caller saved
++      VectorRegister vindex = v2;
++      VectorRegister vW0 = v4;
++      VectorRegister vW1 = v6;
++      VectorRegister vW2 = v8;
++      VectorRegister vW3 = v10;
++      VectorRegister vState0 = v12;
++      VectorRegister vState1 = v14;
++      VectorRegister vHash0  = v16;
++      VectorRegister vHash1  = v18;
++      VectorRegister vTmp0   = v20;
++      VectorRegister vTmp1   = v22;
++
++      Label multi_block_loop;
++
++      __ enter();
++
++      address constant_table = vset_sew == Assembler::e32 ? (address)round_consts_256 : (address)round_consts_512;
++      la(consts, ExternalAddress(constant_table));
++
++      // Register use in this function:
++      //
++      // VECTORS
++      //  vW0 - vW3 (512/1024-bits / 4*128/256 bits / 4*4*32/65 bits), hold the message
++      //             schedule words (Wt). They start with the message block
++      //             content (W0 to W15), then further words in the message
++      //             schedule generated via vsha2ms from previous Wt.
++      //   Initially:
++      //     vW0 = W[  3:0] = { W3,  W2,  W1,  W0}
++      //     vW1 = W[  7:4] = { W7,  W6,  W5,  W4}
++      //     vW2 = W[ 11:8] = {W11, W10,  W9,  W8}
++      //     vW3 = W[15:12] = {W15, W14, W13, W12}
++      //
++      //  vState0 - vState1 hold the working state variables (a, b, ..., h)
++      //    vState0 = {f[t],e[t],b[t],a[t]}
++      //    vState1 = {h[t],g[t],d[t],c[t]}
++      //   Initially:
++      //    vState0 = {H5i-1, H4i-1, H1i-1 , H0i-1}
++      //    vState1 = {H7i-i, H6i-1, H3i-1 , H2i-1}
++      //
++      //  v0 = masks for vrgather/vmerge. Single value during the 16 rounds.
++      //
++      //  vTmp0 = temporary, Wt+Kt
++      //  vTmp1 = temporary, Kt
++      //
++      //  vHash0/vHash1 = hold the initial values of the hash, byte-swapped.
++      //
++      // During most of the function the vector state is configured so that each
++      // vector is interpreted as containing four 32/64 bits (e32/e64) elements (128/256 bits).
++
++      // vsha2ch/vsha2cl uses EGW of 4*SEW.
++      // SHA256 SEW = e32, EGW = 128-bits
++      // SHA512 SEW = e64, EGW = 256-bits
++      //
++      // VLEN is required to be at least 128.
++      // For the case of VLEN=128 and SHA512 we need LMUL=2 to work with 4*e64 (EGW = 256)
++      //
++      // m1: LMUL=1/2
++      // ta: tail agnostic (don't care about those lanes)
++      // ma: mask agnostic (don't care about those lanes)
++      // x0 is not written, we known the number of vector elements.
++
++      if (vset_sew == Assembler::e64 && MaxVectorSize == 16) { // SHA512 and VLEN = 128
++        __ vsetivli(x0, 4, vset_sew, Assembler::m2, Assembler::ma, Assembler::ta);
++      } else {
++        __ vsetivli(x0, 4, vset_sew, Assembler::m1, Assembler::ma, Assembler::ta);
++      }
++
++      int64_t indexes = vset_sew == Assembler::e32 ? 0x00041014ul : 0x00082028ul;
++      __ li(t0, indexes);
++      __ vmv_v_x(vindex, t0);
++
++      // Step-over a,b, so we are pointing to c.
++      // const_add is equal to 4x state variable, div by 2 is thus 2, a,b
++      __ addi(state_c, state, const_add/2);
++
++      // Use index-load to get {f,e,b,a},{h,g,d,c}
++      __ vluxei8_v(vState0, state, vindex);
++      __ vluxei8_v(vState1, state_c, vindex);
++
++      __ bind(multi_block_loop);
++
++      // Capture the initial H values in vHash0 and vHash1 to allow for computing
++      // the resulting H', since H' = H+{a',b',c',...,h'}.
++      __ vmv_v_v(vHash0, vState0);
++      __ vmv_v_v(vHash1, vState1);
++
++      // Load the 512/1024-bits of the message block in vW0-vW3 and perform
++      // an endian swap on each 4/8 bytes element.
++      //
++      // If Zvkb is not implemented one can use vrgather
++      // with an index sequence to byte-swap.
++      //  sequence = [3 2 1 0   7 6 5 4  11 10 9 8   15 14 13 12]
++      //   <https://oeis.org/A004444> gives us "N ^ 3" as a nice formula to generate
++      //  this sequence. 'vid' gives us the N.
++      __ vleXX_v(vset_sew, vW0, buf);
++      __ vrev8_v(vW0, vW0);
++      __ addi(buf, buf, const_add);
++      __ vleXX_v(vset_sew, vW1, buf);
++      __ vrev8_v(vW1, vW1);
++      __ addi(buf, buf, const_add);
++      __ vleXX_v(vset_sew, vW2, buf);
++      __ vrev8_v(vW2, vW2);
++      __ addi(buf, buf, const_add);
++      __ vleXX_v(vset_sew, vW3, buf);
++      __ vrev8_v(vW3, vW3);
++      __ addi(buf, buf, const_add);
++
++      // Set v0 up for the vmerge that replaces the first word (idx==0)
++      __ vid_v(v0);
++      __ vmseq_vi(v0, v0, 0x0);  // v0.mask[i] = (i == 0 ? 1 : 0)
++
++      VectorRegister rotation_regs[] = {vW0, vW1, vW2, vW3};
++      int rot_pos = 0;
++      // Quad-round #0 (+0, vW0->vW1->vW2->vW3) ... #11 (+3, vW3->vW0->vW1->vW2)
++      const int qr_end = vset_sew == Assembler::e32 ? 12 : 16;
++      for (int i = 0; i < qr_end; i++) {
++        sha2_quad_round(vset_sew,
++                   rotation_regs[(rot_pos + 0) & 0x3],
++                   rotation_regs[(rot_pos + 1) & 0x3],
++                   rotation_regs[(rot_pos + 2) & 0x3],
++                   rotation_regs[(rot_pos + 3) & 0x3],
++                   consts,
++                   vTmp1, vTmp0, vState0, vState1);
++        ++rot_pos;
++      }
++      // Quad-round #12 (+0, vW0->vW1->vW2->vW3) ... #15 (+3, vW3->vW0->vW1->vW2)
++      // Note that we stop generating new message schedule words (Wt, vW0-13)
++      // as we already generated all the words we end up consuming (i.e., W[63:60]).
++      const int qr_c_end = qr_end + 4;
++      for (int i = qr_end; i < qr_c_end; i++) {
++        sha2_quad_round(vset_sew,
++                   rotation_regs[(rot_pos + 0) & 0x3],
++                   rotation_regs[(rot_pos + 1) & 0x3],
++                   rotation_regs[(rot_pos + 2) & 0x3],
++                   rotation_regs[(rot_pos + 3) & 0x3],
++                   consts,
++                   vTmp1, vTmp0, vState0, vState1, false, i < (qr_c_end-1));
++        ++rot_pos;
++      }
++
++      //--------------------------------------------------------------------------------
++      // Compute the updated hash value H'
++      //   H' = H + {h',g',...,b',a'}
++      //      = {h,g,...,b,a} + {h',g',...,b',a'}
++      //      = {h+h',g+g',...,b+b',a+a'}
++
++      // H' = H+{a',b',c',...,h'}
++      __ vadd_vv(vState0, vHash0, vState0);
++      __ vadd_vv(vState1, vHash1, vState1);
++
++      if (multi_block) {
++        int total_adds = vset_sew == Assembler::e32 ? 240 : 608;
++        __ addi(consts, consts, -total_adds);
++        __ add(ofs, ofs, vset_sew == Assembler::e32 ? 64 : 128);
++        __ ble(ofs, limit, multi_block_loop);
++        __ mv(c_rarg0, ofs); // return ofs
++      }
++
++      // Store H[0..8] = {a,b,c,d,e,f,g,h} from
++      //  vState0 = {f,e,b,a}
++      //  vState1 = {h,g,d,c}
++      __ vsuxei8_v(vState0, state,   vindex);
++      __ vsuxei8_v(vState1, state_c, vindex);
++
++      __ leave();
++      __ ret();
++
++      return start;
++    }
++  };
++
++#undef __
++#define __ _masm->
++
+   // Set of L registers that correspond to a contiguous memory area.
+   // Each 64-bit register typically corresponds to 2 32-bit integers.
+   template <uint L>
+@@ -4339,6 +4618,7 @@ class StubGenerator: public StubCodeGenerator {
+     return (address) start;
+   }
+ 
++#endif // COMPILER2_OR_JVMCI
+ #if INCLUDE_JFR
+ 
+   static void jfr_prologue(address the_pc, MacroAssembler* _masm, Register thread) {
+@@ -4430,6 +4710,115 @@ class StubGenerator: public StubCodeGenerator {
+ 
+ #endif // INCLUDE_JFR
+ 
++
++  // Continuation point for throwing of implicit exceptions that are
++  // not handled in the current activation. Fabricates an exception
++  // oop and initiates normal exception dispatching in this
++  // frame. Since we need to preserve callee-saved values (currently
++  // only for C2, but done for C1 as well) we need a callee-saved oop
++  // map and therefore have to make these stubs into RuntimeStubs
++  // rather than BufferBlobs.  If the compiler needs all registers to
++  // be preserved between the fault point and the exception handler
++  // then it must assume responsibility for that in
++  // AbstractCompiler::continuation_for_implicit_null_exception or
++  // continuation_for_implicit_division_by_zero_exception. All other
++  // implicit exceptions (e.g., NullPointerException or
++  // AbstractMethodError on entry) are either at call sites or
++  // otherwise assume that stack unwinding will be initiated, so
++  // caller saved registers were assumed volatile in the compiler.
++
++#undef __
++#define __ masm->
++
++  address generate_throw_exception(const char* name,
++                                   address runtime_entry,
++                                   Register arg1 = noreg,
++                                   Register arg2 = noreg) {
++    // Information about frame layout at time of blocking runtime call.
++    // Note that we only have to preserve callee-saved registers since
++    // the compilers are responsible for supplying a continuation point
++    // if they expect all registers to be preserved.
++    // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0
++    assert_cond(runtime_entry != nullptr);
++    enum layout {
++      fp_off = 0,
++      fp_off2,
++      return_off,
++      return_off2,
++      framesize // inclusive of return address
++    };
++
++    const int insts_size = 1024;
++    const int locs_size  = 64;
++
++    CodeBuffer code(name, insts_size, locs_size);
++    OopMapSet* oop_maps  = new OopMapSet();
++    MacroAssembler* masm = new MacroAssembler(&code);
++    assert_cond(oop_maps != nullptr && masm != nullptr);
++
++    address start = __ pc();
++
++    // This is an inlined and slightly modified version of call_VM
++    // which has the ability to fetch the return PC out of
++    // thread-local storage and also sets up last_Java_sp slightly
++    // differently than the real call_VM
++
++    __ enter(); // Save FP and RA before call
++
++    assert(is_even(framesize / 2), "sp not 16-byte aligned");
++
++    // ra and fp are already in place
++    __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog
++
++    int frame_complete = __ pc() - start;
++
++    // Set up last_Java_sp and last_Java_fp
++    address the_pc = __ pc();
++    __ set_last_Java_frame(sp, fp, the_pc, t0);
++
++    // Call runtime
++    if (arg1 != noreg) {
++      assert(arg2 != c_rarg1, "clobbered");
++      __ mv(c_rarg1, arg1);
++    }
++    if (arg2 != noreg) {
++      __ mv(c_rarg2, arg2);
++    }
++    __ mv(c_rarg0, xthread);
++    BLOCK_COMMENT("call runtime_entry");
++    __ call(runtime_entry);
++
++    // Generate oop map
++    OopMap* map = new OopMap(framesize, 0);
++    assert_cond(map != nullptr);
++
++    oop_maps->add_gc_map(the_pc - start, map);
++
++    __ reset_last_Java_frame(true);
++
++    __ leave();
++
++    // check for pending exceptions
++#ifdef ASSERT
++    Label L;
++    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++    __ bnez(t0, L);
++    __ should_not_reach_here();
++    __ bind(L);
++#endif // ASSERT
++    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++
++    // codeBlob framesize is in words (not VMRegImpl::slot_size)
++    RuntimeStub* stub =
++      RuntimeStub::new_runtime_stub(name,
++                                    &code,
++                                    frame_complete,
++                                    (framesize >> (LogBytesPerWord - LogBytesPerInt)),
++                                    oop_maps, false);
++    assert(stub != nullptr, "create runtime stub fail!");
++    return stub->entry_point();
++  }
++
+ #undef __
+ 
+   // Initialization
+@@ -4550,6 +4939,18 @@ class StubGenerator: public StubCodeGenerator {
+     }
+ #endif // COMPILER2
+ 
++    if (UseSHA256Intrinsics) {
++      Sha2Generator sha2(_masm, this);
++      StubRoutines::_sha256_implCompress   = sha2.generate_sha256_implCompress(false);
++      StubRoutines::_sha256_implCompressMB = sha2.generate_sha256_implCompress(true);
++    }
++
++    if (UseSHA512Intrinsics) {
++      Sha2Generator sha2(_masm, this);
++      StubRoutines::_sha512_implCompress   = sha2.generate_sha512_implCompress(false);
++      StubRoutines::_sha512_implCompressMB = sha2.generate_sha512_implCompress(true);
++    }
++
+     generate_compare_long_strings();
+ 
+     generate_string_indexof_stubs();
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+index a27acb25b..38da4752c 100644
+--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+@@ -121,26 +121,11 @@ void VM_Version::initialize() {
+     FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+   }
+ 
+-  if (UseSHA) {
+-    warning("SHA instructions are not available on this CPU");
+-    FLAG_SET_DEFAULT(UseSHA, false);
+-  }
+-
+   if (UseSHA1Intrinsics) {
+     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
+     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+   }
+ 
+-  if (UseSHA256Intrinsics) {
+-    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
+-    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+-  }
+-
+-  if (UseSHA512Intrinsics) {
+-    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
+-    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+-  }
+-
+   if (UseSHA3Intrinsics) {
+     warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
+     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
+@@ -224,6 +209,36 @@ void VM_Version::initialize() {
+ #ifdef COMPILER2
+   c2_initialize();
+ #endif // COMPILER2
++
++  if (UseZvkn && !UseRVV) {
++    FLAG_SET_DEFAULT(UseZvkn, false);
++    warning("Cannot enable Zvkn on cpu without RVV support.");
++  }
++
++  if (!UseZvkn && UseSHA) {
++    warning("SHA instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA, false);
++  } else if (UseZvkn && FLAG_IS_DEFAULT(UseSHA)) {
++    FLAG_SET_DEFAULT(UseSHA, true);
++  }
++
++  if (!UseSHA) {
++    if (UseSHA256Intrinsics) {
++      warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU, UseZvkn needed.");
++      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
++    }
++    if (UseSHA512Intrinsics) {
++      warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU, UseZvkn needed.");
++      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++    }
++  } else {
++    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
++       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
++    }
++    if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
++      FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
++    }
++  }
+ }
+ 
+ #ifdef COMPILER2
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
+index 01c5cf0c6..590585b42 100644
+--- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
+@@ -144,6 +144,7 @@ class VM_Version : public Abstract_VM_Version {
+   decl(ext_Zifencei    , "Zifencei"    , RV_NO_FLAG_BIT, true , NO_UPDATE_DEFAULT)              \
+   decl(ext_Zic64b      , "Zic64b"      , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZic64b))      \
+   decl(ext_Zihintpause , "Zihintpause" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZihintpause)) \
++  decl(ext_Zvkn        , "Zvkn"        , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZvkn))        \
+   decl(mvendorid       , "VendorId"    , RV_NO_FLAG_BIT, false, NO_UPDATE_DEFAULT)              \
+   decl(marchid         , "ArchId"      , RV_NO_FLAG_BIT, false, NO_UPDATE_DEFAULT)              \
+   decl(mimpid          , "ImpId"       , RV_NO_FLAG_BIT, false, NO_UPDATE_DEFAULT)              \
diff --git a/Backport-JDK-8322179-RISC-V-Implement-SHA-1-intrinsic.patch b/Backport-JDK-8322179-RISC-V-Implement-SHA-1-intrinsic.patch
new file mode 100644
index 0000000000000000000000000000000000000000..3f4c485ebe23b4bd91bb17beda60948762c25eed
--- /dev/null
+++ b/Backport-JDK-8322179-RISC-V-Implement-SHA-1-intrinsic.patch
@@ -0,0 +1,481 @@
+diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+index dec9a8464..a554729ab 100644
+--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+@@ -4618,6 +4618,344 @@ class StubGenerator: public StubCodeGenerator {
+     return (address) start;
+   }
+ 
++  // ------------------------ SHA-1 intrinsic ------------------------
++
++  // K't =
++  //    5a827999, 0  <= t <= 19
++  //    6ed9eba1, 20 <= t <= 39
++  //    8f1bbcdc, 40 <= t <= 59
++  //    ca62c1d6, 60 <= t <= 79
++  void sha1_prepare_k(Register cur_k, int round) {
++    assert(round >= 0 && round < 80, "must be");
++
++    static const int64_t ks[] = {0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6};
++    if ((round % 20) == 0) {
++      __ mv(cur_k, ks[round/20]);
++    }
++  }
++
++  // W't =
++  //    M't,                                      0 <=  t <= 15
++  //    ROTL'1(W't-3 ^ W't-8 ^ W't-14 ^ W't-16),  16 <= t <= 79
++  void sha1_prepare_w(Register cur_w, Register ws[], Register buf, int round) {
++    assert(round >= 0 && round < 80, "must be");
++
++    if (round < 16) {
++      // in the first 16 rounds, in ws[], every register contains 2 W't, e.g.
++      //   in ws[0], high part contains W't-0, low part contains W't-1,
++      //   in ws[1], high part contains W't-2, low part contains W't-3,
++      //   ...
++      //   in ws[7], high part contains W't-14, low part contains W't-15.
++
++      if ((round % 2) == 0) {
++        __ ld(ws[round/2], Address(buf, (round/2) * 8));
++        // reverse bytes, as SHA-1 is defined in big-endian.
++        __ revb(ws[round/2], ws[round/2]);
++        __ srli(cur_w, ws[round/2], 32);
++      } else {
++        __ mv(cur_w, ws[round/2]);
++      }
++
++      return;
++    }
++
++    if ((round % 2) == 0) {
++      int idx = 16;
++      // W't = ROTL'1(W't-3 ^ W't-8 ^ W't-14 ^ W't-16),  16 <= t <= 79
++      __ srli(t1, ws[(idx-8)/2], 32);
++      __ xorr(t0, ws[(idx-3)/2], t1);
++
++      __ srli(t1, ws[(idx-14)/2], 32);
++      __ srli(cur_w, ws[(idx-16)/2], 32);
++      __ xorr(cur_w, cur_w, t1);
++
++      __ xorr(cur_w, cur_w, t0);
++      __ rolw_imm(cur_w, cur_w, 1, t0);
++
++      // copy the cur_w value to ws[8].
++      // now, valid w't values are at:
++      //  w0:       ws[0]'s lower 32 bits
++      //  w1 ~ w14: ws[1] ~ ws[7]
++      //  w15:      ws[8]'s higher 32 bits
++      __ slli(ws[idx/2], cur_w, 32);
++
++      return;
++    }
++
++    int idx = 17;
++    // W't = ROTL'1(W't-3 ^ W't-8 ^ W't-14 ^ W't-16),  16 <= t <= 79
++    __ srli(t1, ws[(idx-3)/2], 32);
++    __ xorr(t0, t1, ws[(idx-8)/2]);
++
++    __ xorr(cur_w, ws[(idx-16)/2], ws[(idx-14)/2]);
++
++    __ xorr(cur_w, cur_w, t0);
++    __ rolw_imm(cur_w, cur_w, 1, t0);
++
++    // copy the cur_w value to ws[8]
++    __ zero_extend(cur_w, cur_w, 32);
++    __ orr(ws[idx/2], ws[idx/2], cur_w);
++
++    // shift the w't registers, so they start from ws[0] again.
++    // now, valid w't values are at:
++    //  w0 ~ w15: ws[0] ~ ws[7]
++    Register ws_0 = ws[0];
++    for (int i = 0; i < 16/2; i++) {
++      ws[i] = ws[i+1];
++    }
++    ws[8] = ws_0;
++  }
++
++  // f't(x, y, z) =
++  //    Ch(x, y, z)     = (x & y) ^ (~x & z)            , 0  <= t <= 19
++  //    Parity(x, y, z) = x ^ y ^ z                     , 20 <= t <= 39
++  //    Maj(x, y, z)    = (x & y) ^ (x & z) ^ (y & z)   , 40 <= t <= 59
++  //    Parity(x, y, z) = x ^ y ^ z                     , 60 <= t <= 79
++  void sha1_f(Register dst, Register x, Register y, Register z, int round) {
++    assert(round >= 0 && round < 80, "must be");
++    assert_different_registers(dst, x, y, z, t0, t1);
++
++    if (round < 20) {
++      // (x & y) ^ (~x & z)
++      __ andr(t0, x, y);
++      __ andn(dst, z, x);
++      __ xorr(dst, dst, t0);
++    } else if (round >= 40 && round < 60) {
++      // (x & y) ^ (x & z) ^ (y & z)
++      __ andr(t0, x, y);
++      __ andr(t1, x, z);
++      __ andr(dst, y, z);
++      __ xorr(dst, dst, t0);
++      __ xorr(dst, dst, t1);
++    } else {
++      // x ^ y ^ z
++      __ xorr(dst, x, y);
++      __ xorr(dst, dst, z);
++    }
++  }
++
++  // T = ROTL'5(a) + f't(b, c, d) + e + K't + W't
++  // e = d
++  // d = c
++  // c = ROTL'30(b)
++  // b = a
++  // a = T
++  void sha1_process_round(Register a, Register b, Register c, Register d, Register e,
++                          Register cur_k, Register cur_w, Register tmp, int round) {
++    assert(round >= 0 && round < 80, "must be");
++    assert_different_registers(a, b, c, d, e, cur_w, cur_k, tmp, t0);
++
++    // T = ROTL'5(a) + f't(b, c, d) + e + K't + W't
++
++    // cur_w will be recalculated at the beginning of each round,
++    // so, we can reuse it as a temp register here.
++    Register tmp2 = cur_w;
++
++    // reuse e as a temporary register, as we will mv new value into it later
++    Register tmp3 = e;
++    __ add(tmp2, cur_k, tmp2);
++    __ add(tmp3, tmp3, tmp2);
++    __ rolw_imm(tmp2, a, 5, t0);
++
++    sha1_f(tmp, b, c, d, round);
++
++    __ add(tmp2, tmp2, tmp);
++    __ add(tmp2, tmp2, tmp3);
++
++    // e = d
++    // d = c
++    // c = ROTL'30(b)
++    // b = a
++    // a = T
++    __ mv(e, d);
++    __ mv(d, c);
++
++    __ rolw_imm(c, b, 30);
++    __ mv(b, a);
++    __ mv(a, tmp2);
++  }
++
++  // H(i)0 = a + H(i-1)0
++  // H(i)1 = b + H(i-1)1
++  // H(i)2 = c + H(i-1)2
++  // H(i)3 = d + H(i-1)3
++  // H(i)4 = e + H(i-1)4
++  void sha1_calculate_im_hash(Register a, Register b, Register c, Register d, Register e,
++                              Register prev_ab, Register prev_cd, Register prev_e) {
++    assert_different_registers(a, b, c, d, e, prev_ab, prev_cd, prev_e);
++
++    __ add(a, a, prev_ab);
++    __ srli(prev_ab, prev_ab, 32);
++    __ add(b, b, prev_ab);
++
++    __ add(c, c, prev_cd);
++    __ srli(prev_cd, prev_cd, 32);
++    __ add(d, d, prev_cd);
++
++    __ add(e, e, prev_e);
++  }
++
++  void sha1_preserve_prev_abcde(Register a, Register b, Register c, Register d, Register e,
++                                Register prev_ab, Register prev_cd, Register prev_e) {
++    assert_different_registers(a, b, c, d, e, prev_ab, prev_cd, prev_e, t0);
++
++    __ slli(t0, b, 32);
++    __ zero_extend(prev_ab, a, 32);
++    __ orr(prev_ab, prev_ab, t0);
++
++    __ slli(t0, d, 32);
++    __ zero_extend(prev_cd, c, 32);
++    __ orr(prev_cd, prev_cd, t0);
++
++    __ mv(prev_e, e);
++  }
++
++  // Intrinsic for:
++  //   void sun.security.provider.SHA.implCompress0(byte[] buf, int ofs)
++  //   void sun.security.provider.DigestBase.implCompressMultiBlock0(byte[] b, int ofs, int limit)
++  //
++  // Arguments:
++  //
++  // Inputs:
++  //   c_rarg0: byte[]  src array + offset
++  //   c_rarg1: int[]   SHA.state
++  //   - - - - - - below are only for implCompressMultiBlock0 - - - - - -
++  //   c_rarg2: int     offset
++  //   c_rarg3: int     limit
++  //
++  // Outputs:
++  //   - - - - - - below are only for implCompressMultiBlock0 - - - - - -
++  //   c_rarg0: int offset, when (multi_block == true)
++  //
++  address generate_sha1_implCompress(bool multi_block, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    address start = __ pc();
++    __ enter();
++
++    RegSet saved_regs = RegSet::range(x18, x27);
++    if (multi_block) {
++      // use x9 as src below.
++      saved_regs += RegSet::of(x9);
++    }
++    __ push_reg(saved_regs, sp);
++
++    // c_rarg0 - c_rarg3: x10 - x13
++    Register buf    = c_rarg0;
++    Register state  = c_rarg1;
++    Register offset = c_rarg2;
++    Register limit  = c_rarg3;
++    // use src to contain the original start point of the array.
++    Register src    = x9;
++
++    if (multi_block) {
++      __ sub(limit, limit, offset);
++      __ add(limit, limit, buf);
++      __ sub(src, buf, offset);
++    }
++
++    // [args-reg]:  x14 - x17
++    // [temp-reg]:  x28 - x31
++    // [saved-reg]: x18 - x27
++
++    // h0/1/2/3/4
++    const Register a = x14, b = x15, c = x16, d = x17, e = x28;
++    // w0, w1, ... w15
++    // put two adjecent w's in one register:
++    //    one at high word part, another at low word part
++    // at different round (even or odd), w't value reside in different items in ws[].
++    // w0 ~ w15, either reside in
++    //    ws[0] ~ ws[7], where
++    //      w0 at higher 32 bits of ws[0],
++    //      w1 at lower 32 bits of ws[0],
++    //      ...
++    //      w14 at higher 32 bits of ws[7],
++    //      w15 at lower 32 bits of ws[7].
++    // or, reside in
++    //    w0:       ws[0]'s lower 32 bits
++    //    w1 ~ w14: ws[1] ~ ws[7]
++    //    w15:      ws[8]'s higher 32 bits
++    Register ws[9] = {x29, x30, x31, x18,
++                      x19, x20, x21, x22,
++                      x23}; // auxiliary register for calculating w's value
++    // current k't's value
++    const Register cur_k = x24;
++    // current w't's value
++    const Register cur_w = x25;
++    // values of a, b, c, d, e in the previous round
++    const Register prev_ab = x26, prev_cd = x27;
++    const Register prev_e = offset; // reuse offset/c_rarg2
++
++    // load 5 words state into a, b, c, d, e.
++    //
++    // To minimize the number of memory operations, we apply following
++    // optimization: read the states (a/b/c/d) of 4-byte values in pairs,
++    // with a single ld, and split them into 2 registers.
++    //
++    // And, as the core algorithm of SHA-1 works on 32-bits words, so
++    // in the following code, it does not care about the content of
++    // higher 32-bits in a/b/c/d/e. Based on this observation,
++    // we can apply further optimization, which is to just ignore the
++    // higher 32-bits in a/c/e, rather than set the higher
++    // 32-bits of a/c/e to zero explicitly with extra instructions.
++    __ ld(a, Address(state, 0));
++    __ srli(b, a, 32);
++    __ ld(c, Address(state, 8));
++    __ srli(d, c, 32);
++    __ lw(e, Address(state, 16));
++
++    Label L_sha1_loop;
++    if (multi_block) {
++      __ BIND(L_sha1_loop);
++    }
++
++    sha1_preserve_prev_abcde(a, b, c, d, e, prev_ab, prev_cd, prev_e);
++
++    for (int round = 0; round < 80; round++) {
++      // prepare K't value
++      sha1_prepare_k(cur_k, round);
++
++      // prepare W't value
++      sha1_prepare_w(cur_w, ws, buf, round);
++
++      // one round process
++      sha1_process_round(a, b, c, d, e, cur_k, cur_w, t2, round);
++    }
++
++    // compute the intermediate hash value
++    sha1_calculate_im_hash(a, b, c, d, e, prev_ab, prev_cd, prev_e);
++
++    if (multi_block) {
++      int64_t block_bytes = 16 * 4;
++      __ addi(buf, buf, block_bytes);
++
++      __ bge(limit, buf, L_sha1_loop, true);
++    }
++
++    // store back the state.
++    __ zero_extend(a, a, 32);
++    __ slli(b, b, 32);
++    __ orr(a, a, b);
++    __ sd(a, Address(state, 0));
++    __ zero_extend(c, c, 32);
++    __ slli(d, d, 32);
++    __ orr(c, c, d);
++    __ sd(c, Address(state, 8));
++    __ sw(e, Address(state, 16));
++
++    // return offset
++    if (multi_block) {
++      __ sub(c_rarg0, buf, src);
++    }
++
++    __ pop_reg(saved_regs, sp);
++
++    __ leave();
++    __ ret();
++
++    return (address) start;
++  }
+ #endif // COMPILER2_OR_JVMCI
+ #if INCLUDE_JFR
+ 
+@@ -4959,6 +5297,12 @@ class StubGenerator: public StubCodeGenerator {
+       StubRoutines::_md5_implCompress   = generate_md5_implCompress(false, "md5_implCompress");
+       StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true,  "md5_implCompressMB");
+     }
++
++    if (UseSHA1Intrinsics) {
++      StubRoutines::_sha1_implCompress     = generate_sha1_implCompress(false, "sha1_implCompress");
++      StubRoutines::_sha1_implCompressMB   = generate_sha1_implCompress(true, "sha1_implCompressMB");
++    }
++
+ #endif // COMPILER2_OR_JVMCI
+   }
+ 
+diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
+index fde88f5e3..45e9bc0dc 100644
+--- a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
+@@ -39,7 +39,7 @@ enum platform_dependent_constants {
+   // simply increase sizes if too small (assembler will crash if too small)
+   _initial_stubs_code_size      = 10000,
+   _continuation_stubs_code_size =  2000,
+-  _compiler_stubs_code_size     = 15000 ZGC_ONLY(+5000),
++  _compiler_stubs_code_size     = 25000 ZGC_ONLY(+5000),
+   _final_stubs_code_size        = 20000 ZGC_ONLY(+10000)
+ };
+ 
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+index 38da4752c..39f41e0bf 100644
+--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+@@ -121,16 +121,6 @@ void VM_Version::initialize() {
+     FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+   }
+ 
+-  if (UseSHA1Intrinsics) {
+-    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
+-    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+-  }
+-
+-  if (UseSHA3Intrinsics) {
+-    warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
+-    FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
+-  }
+-
+   if (UseCRC32Intrinsics) {
+     warning("CRC32 intrinsics are not available on this CPU.");
+     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
+@@ -210,34 +200,66 @@ void VM_Version::initialize() {
+   c2_initialize();
+ #endif // COMPILER2
+ 
+-  if (UseZvkn && !UseRVV) {
+-    FLAG_SET_DEFAULT(UseZvkn, false);
+-    warning("Cannot enable Zvkn on cpu without RVV support.");
+-  }
+ 
+-  if (!UseZvkn && UseSHA) {
+-    warning("SHA instructions are not available on this CPU");
+-    FLAG_SET_DEFAULT(UseSHA, false);
+-  } else if (UseZvkn && FLAG_IS_DEFAULT(UseSHA)) {
++  // SHA's
++  if (FLAG_IS_DEFAULT(UseSHA)) {
+     FLAG_SET_DEFAULT(UseSHA, true);
+   }
+ 
+-  if (!UseSHA) {
++  // SHA-1, no RVV required though.
++  if (UseSHA) {
++    if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
++      FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
++    }
++  } else if (UseSHA1Intrinsics) {
++    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++  }
++
++  // UseZvkn (depends on RVV) and SHA-2.
++  if (UseZvkn && !UseRVV) {
++    FLAG_SET_DEFAULT(UseZvkn, false);
++    warning("Cannot enable Zvkn on cpu without RVV support.");
++  }
++  // SHA-2, depends on Zvkn.
++  if (UseSHA) {
++    if (UseZvkn) {
++      if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
++        FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
++      }
++      if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
++        FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
++      }
++    } else {
++      if (UseSHA256Intrinsics) {
++        warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU, UseZvkn needed.");
++        FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
++      }
++      if (UseSHA512Intrinsics) {
++        warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU, UseZvkn needed.");
++        FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++      }
++    }
++  } else {
+     if (UseSHA256Intrinsics) {
+-      warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU, UseZvkn needed.");
++      warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU, as UseSHA disabled.");
+       FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+     }
+     if (UseSHA512Intrinsics) {
+-      warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU, UseZvkn needed.");
++      warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU, as UseSHA disabled.");
+       FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+     }
+-  } else {
+-    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+-       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+-    }
+-    if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+-      FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
+-    }
++  }
++
++  // SHA-3
++  if (UseSHA3Intrinsics) {
++    warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
++  }
++
++  // UseSHA
++  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA3Intrinsics || UseSHA512Intrinsics)) {
++    FLAG_SET_DEFAULT(UseSHA, false);
+   }
+ }
+ 
diff --git a/Backport-JDK-8322209-8322179-8329641-RISC-V-Enable-sha-md5-tests.patch b/Backport-JDK-8322209-8322179-8329641-RISC-V-Enable-sha-md5-tests.patch
new file mode 100644
index 0000000000000000000000000000000000000000..84e6b279777d6e4179ced1f8975337f388f8e470
--- /dev/null
+++ b/Backport-JDK-8322209-8322179-8329641-RISC-V-Enable-sha-md5-tests.patch
@@ -0,0 +1,62 @@
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseMD5IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseMD5IntrinsicsOptionOnUnsupportedCPU.java
+index e9ae2f6c1..cd5933ec9 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseMD5IntrinsicsOptionOnUnsupportedCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseMD5IntrinsicsOptionOnUnsupportedCPU.java
+@@ -39,6 +39,7 @@ package compiler.intrinsics.sha.cli;
+ 
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
+ import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
+ 
+@@ -49,6 +50,8 @@ public class TestUseMD5IntrinsicsOptionOnUnsupportedCPU {
+                         DigestOptionsBase.USE_MD5_INTRINSICS_OPTION, /* checkUseSHA = */ false),
+                 new GenericTestCaseForUnsupportedAArch64CPU(
+                         DigestOptionsBase.USE_MD5_INTRINSICS_OPTION, /* checkUseSHA = */ false),
++                new GenericTestCaseForUnsupportedRISCV64CPU(
++                        DigestOptionsBase.USE_MD5_INTRINSICS_OPTION, /* checkUseSHA = */ false),
+                 new GenericTestCaseForOtherCPU(
+                         DigestOptionsBase.USE_MD5_INTRINSICS_OPTION, /* checkUseSHA = */ false)).test();
+     }
+diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+index 689c7c8cc..27fe99892 100644
+--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
++++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+@@ -61,14 +61,16 @@ public class IntrinsicPredicates {
+ 
+     public static final BooleanSupplier MD5_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", null, null),
++              new OrPredicate(new CPUSpecificPredicate("riscv64.*", null, null),
+               // x86 variants
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   null, null),
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    null, null),
+-                              new CPUSpecificPredicate("x86.*",     null, null))));
++                              new CPUSpecificPredicate("x86.*",     null, null)))));
+ 
+     public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),
+-              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null),
++              // SHA-1 intrinsic is implemented with scalar instructions on riscv64
++              new OrPredicate(new CPUSpecificPredicate("riscv64.*", null, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha1" }, null),
+               // x86 variants
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha" },  null),
+@@ -77,7 +79,7 @@ public class IntrinsicPredicates {
+ 
+     public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256"       }, null),
+-              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256"       }, null),
++              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "zvkn"         }, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha256"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha"          }, null),
+@@ -90,7 +92,7 @@ public class IntrinsicPredicates {
+ 
+     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512"       }, null),
+-              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512"       }, null),
++              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "zvkn"         }, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha512"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha"          }, null),
diff --git a/Backport-JDK-8334999-RISC-V-AES-single-block-cryption-intrinsics.patch b/Backport-JDK-8334999-RISC-V-AES-single-block-cryption-intrinsics.patch
new file mode 100644
index 0000000000000000000000000000000000000000..c6281af5e1e0044ff1954c675aef179602400138
--- /dev/null
+++ b/Backport-JDK-8334999-RISC-V-AES-single-block-cryption-intrinsics.patch
@@ -0,0 +1,271 @@
+diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
+index 24e5f4fa8..4f2d2bfb3 100644
+--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
+@@ -1846,6 +1846,12 @@ enum Nf {
+   // Vector Bit-manipulation used in Cryptography (Zvkb) Extension
+   INSN(vbrev8_v, 0b1010111, 0b010, 0b01000, 0b010010);
+   INSN(vrev8_v,  0b1010111, 0b010, 0b01001, 0b010010);
++  // Vector AES instructions (Zvkned extension)
++  INSN(vaesem_vv,   0b1110111, 0b010, 0b00010, 0b101000);
++  INSN(vaesef_vv,   0b1110111, 0b010, 0b00011, 0b101000);
++
++  INSN(vaesdm_vv,   0b1110111, 0b010, 0b00000, 0b101000);
++  INSN(vaesdf_vv,   0b1110111, 0b010, 0b00001, 0b101000);
+ 
+ #undef INSN
+ 
+diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+index a554729ab..dd478edbb 100644
+--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+@@ -2313,6 +2313,173 @@ class StubGenerator: public StubCodeGenerator {
+     StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
+   }
+ 
++  void generate_aes_loadkeys(const Register &key, VectorRegister *working_vregs, int rounds) {
++    const int step = 16;
++    for (int i = 0; i < rounds; i++) {
++      __ vle32_v(working_vregs[i], key);
++      // The keys are stored in little-endian array, while we need
++      // to operate in big-endian.
++      // So performing an endian-swap here with vrev8.v instruction
++      __ vrev8_v(working_vregs[i], working_vregs[i]);
++      __ addi(key, key, step);
++    }
++  }
++
++  void generate_aes_encrypt(const VectorRegister &res, VectorRegister *working_vregs, int rounds) {
++    assert(rounds <= 15, "rounds should be less than or equal to working_vregs size");
++
++    __ vxor_vv(res, res, working_vregs[0]);
++    for (int i = 1; i < rounds - 1; i++) {
++      __ vaesem_vv(res, working_vregs[i]);
++    }
++    __ vaesef_vv(res, working_vregs[rounds - 1]);
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   c_rarg0   - source byte array address
++  //   c_rarg1   - destination byte array address
++  //   c_rarg2   - K (key) in little endian int array
++  //
++  address generate_aescrypt_encryptBlock() {
++    assert(UseAESIntrinsics, "need AES instructions (Zvkned extension) support");
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
++
++    Label L_aes128, L_aes192;
++
++    const Register from        = c_rarg0;  // source array address
++    const Register to          = c_rarg1;  // destination array address
++    const Register key         = c_rarg2;  // key array address
++    const Register keylen      = c_rarg3;
++
++    VectorRegister working_vregs[] = {
++      v4, v5, v6, v7, v8, v9, v10, v11,
++      v12, v13, v14, v15, v16, v17, v18
++    };
++    const VectorRegister res   = v19;
++
++    address start = __ pc();
++    __ enter();
++
++    __ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
++
++    __ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
++    __ vle32_v(res, from);
++
++    __ mv(t2, 52);
++    __ blt(keylen, t2, L_aes128);
++    __ beq(keylen, t2, L_aes192);
++    // Else we fallthrough to the biggest case (256-bit key size)
++
++    // Note: the following function performs key += 15*16
++    generate_aes_loadkeys(key, working_vregs, 15);
++    generate_aes_encrypt(res, working_vregs, 15);
++    __ vse32_v(res, to);
++    __ mv(c_rarg0, 0);
++    __ leave();
++    __ ret();
++
++  __ bind(L_aes192);
++    // Note: the following function performs key += 13*16
++    generate_aes_loadkeys(key, working_vregs, 13);
++    generate_aes_encrypt(res, working_vregs, 13);
++    __ vse32_v(res, to);
++    __ mv(c_rarg0, 0);
++    __ leave();
++    __ ret();
++
++  __ bind(L_aes128);
++    // Note: the following function performs key += 11*16
++    generate_aes_loadkeys(key, working_vregs, 11);
++    generate_aes_encrypt(res, working_vregs, 11);
++    __ vse32_v(res, to);
++    __ mv(c_rarg0, 0);
++    __ leave();
++    __ ret();
++
++    return start;
++  }
++
++  void generate_aes_decrypt(const VectorRegister &res, VectorRegister *working_vregs, int rounds) {
++    assert(rounds <= 15, "rounds should be less than or equal to working_vregs size");
++
++    __ vxor_vv(res, res, working_vregs[rounds - 1]);
++    for (int i = rounds - 2; i > 0; i--) {
++      __ vaesdm_vv(res, working_vregs[i]);
++    }
++    __ vaesdf_vv(res, working_vregs[0]);
++  }
++
++  // Arguments:
++  //
++  // Inputs:
++  //   c_rarg0   - source byte array address
++  //   c_rarg1   - destination byte array address
++  //   c_rarg2   - K (key) in little endian int array
++  //
++  address generate_aescrypt_decryptBlock() {
++    assert(UseAESIntrinsics, "need AES instructions (Zvkned extension) support");
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
++
++    Label L_aes128, L_aes192;
++
++    const Register from        = c_rarg0;  // source array address
++    const Register to          = c_rarg1;  // destination array address
++    const Register key         = c_rarg2;  // key array address
++    const Register keylen      = c_rarg3;
++
++    VectorRegister working_vregs[] = {
++      v4, v5, v6, v7, v8, v9, v10, v11,
++      v12, v13, v14, v15, v16, v17, v18
++    };
++    const VectorRegister res   = v19;
++
++    address start = __ pc();
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
++
++    __ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
++    __ vle32_v(res, from);
++
++    __ mv(t2, 52);
++    __ blt(keylen, t2, L_aes128);
++    __ beq(keylen, t2, L_aes192);
++    // Else we fallthrough to the biggest case (256-bit key size)
++
++    // Note: the following function performs key += 15*16
++    generate_aes_loadkeys(key, working_vregs, 15);
++    generate_aes_decrypt(res, working_vregs, 15);
++    __ vse32_v(res, to);
++    __ mv(c_rarg0, 0);
++    __ leave();
++    __ ret();
++
++  __ bind(L_aes192);
++    // Note: the following function performs key += 13*16
++    generate_aes_loadkeys(key, working_vregs, 13);
++    generate_aes_decrypt(res, working_vregs, 13);
++    __ vse32_v(res, to);
++    __ mv(c_rarg0, 0);
++    __ leave();
++    __ ret();
++
++  __ bind(L_aes128);
++    // Note: the following function performs key += 11*16
++    generate_aes_loadkeys(key, working_vregs, 11);
++    generate_aes_decrypt(res, working_vregs, 11);
++    __ vse32_v(res, to);
++    __ mv(c_rarg0, 0);
++    __ leave();
++    __ ret();
++
++    return start;
++  }
+   // code for comparing 16 bytes of strings with same encoding
+   void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
+     const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
+@@ -5271,6 +5438,11 @@ class StubGenerator: public StubCodeGenerator {
+       StubRoutines::_montgomerySquare = g.generate_square();
+     }
+ 
++    if (UseAESIntrinsics) {
++      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
++      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
++    }
++
+     if (UseRVVForBigIntegerShiftIntrinsics) {
+       StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
+       StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+index 39f41e0bf..c49072633 100644
+--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+@@ -105,17 +105,6 @@ void VM_Version::initialize() {
+     FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0);
+   }
+ 
+-  if (UseAES || UseAESIntrinsics) {
+-    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
+-      warning("AES instructions are not available on this CPU");
+-      FLAG_SET_DEFAULT(UseAES, false);
+-    }
+-    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+-      warning("AES intrinsics are not available on this CPU");
+-      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+-    }
+-  }
+-
+   if (UseAESCTRIntrinsics) {
+     warning("AES/CTR intrinsics are not available on this CPU");
+     FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+@@ -360,6 +349,23 @@ void VM_Version::c2_initialize() {
+   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
+     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
+   }
++  
++  // AES
++  if (UseZvkn) {
++    UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
++    UseAESIntrinsics =
++        UseAESIntrinsics || (UseAES && FLAG_IS_DEFAULT(UseAESIntrinsics));
++    if (UseAESIntrinsics && !UseAES) {
++      warning("UseAESIntrinsics enabled, but UseAES not, enabling");
++      UseAES = true;
++    }
++  } else if (UseAESIntrinsics || UseAES) {
++    if (!FLAG_IS_DEFAULT(UseAESIntrinsics) || !FLAG_IS_DEFAULT(UseAES)) {
++      warning("AES intrinsics require Zvkn extension (not available on this CPU).");
++    }
++    FLAG_SET_DEFAULT(UseAES, false);
++    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++  }
+ }
+ #endif // COMPILER2
+ 
+diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp
+index 7ca86d094..b24afb297 100644
+--- a/src/hotspot/share/opto/library_call.cpp
++++ b/src/hotspot/share/opto/library_call.cpp
+@@ -7007,11 +7007,11 @@ bool LibraryCallKit::inline_counterMode_AESCrypt(vmIntrinsics::ID id) {
+ 
+ //------------------------------get_key_start_from_aescrypt_object-----------------------
+ Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) {
+-#if defined(PPC64) || defined(S390)
++#if defined(PPC64) || defined(S390) || defined(RISCV64)
+   // MixColumns for decryption can be reduced by preprocessing MixColumns with round keys.
+   // Intel's extension is based on this optimization and AESCrypt generates round keys by preprocessing MixColumns.
+   // However, ppc64 vncipher processes MixColumns and requires the same round keys with encryption.
+-  // The ppc64 stubs of encryption and decryption use the same round keys (sessionK[0]).
++  // The ppc64 and riscv64 stubs of encryption and decryption use the same round keys (sessionK[0]).
+   Node* objSessionK = load_field_from_object(aescrypt_object, "sessionK", "[[I");
+   assert (objSessionK != nullptr, "wrong version of com.sun.crypto.provider.AESCrypt");
+   if (objSessionK == nullptr) {
diff --git a/openjdk-21.spec b/openjdk-21.spec
index d7e8e1eb3a9a9d1e8f43e6c3cc669a35d647459e..eabf9aafc2209d77289117014924e07fdf41e5e0 100644
--- a/openjdk-21.spec
+++ b/openjdk-21.spec
@@ -905,7 +905,7 @@ Name:    java-21-%{origin}
 Version: %{newjavaver}.%{buildver}
 # This package needs `.rolling` as part of Release so as to not conflict on install with
 # java-X-openjdk. I.e. when latest rolling release is also an LTS release packaged as
-Release: 3
+Release: 4
 
 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons
 # and this change was brought into RHEL-4. java-1.5.0-ibm packages
@@ -1059,6 +1059,10 @@ Patch3004: Backport-JDK-8348554-Enhance-Linux-kernel-version-ch.patch
 Patch3005: Backport-JDK-8348384-RISC-V-Disable-auto-enable-Vect.patch
 Patch3006: Backport-JDK-8352673-RISC-V-Vector-can-t-be-turned-o.patch
 Patch3007: Backport-JDK-8355878-RISC-V-jdk-incubator-vector-Dou.patch
+Patch3008: Backport-JDK-8319716-8327283-RISC-V-Add-SHA-2.patch
+Patch3009: Backport-JDK-8322179-RISC-V-Implement-SHA-1-intrinsic.patch
+Patch3010: Backport-JDK-8322209-8322179-8329641-RISC-V-Enable-sha-md5-tests.patch
+Patch3011: Backport-JDK-8334999-RISC-V-AES-single-block-cryption-intrinsics.patch
 
 BuildRequires: autoconf
 BuildRequires: automake
@@ -1361,6 +1365,10 @@ pushd %{top_level_dir_name}
 %patch3005 -p1
 %patch3006 -p1
 %patch3007 -p1
+%patch3008 -p1
+%patch3009 -p1
+%patch3010 -p1
+%patch3011 -p1
 popd
 %endif
 
@@ -1918,6 +1926,9 @@ cjc.mainProgram(args) -- the returns from copy_jdk_configs.lua should not affect
 
 
 %changelog
+* Fri Oct 10 2025 chenlang <chen.lang5@zte.com.cn> - 1:21.0.8.9-4
+- RISC-V add sha1 sha2 and zvkn patches
+
 * Tue Aug 26 2025 songliyang <songliyang@kylinos.cn> - 1:21.0.8.9-3
 - update LoongArch64 port to 21.0.8