diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 56adfd0f509..05ebf0508dc 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -5955,9 +5955,15 @@ void MacroAssembler::remove_frame(int framesize) { void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) { if (needs_stack_repair) { - // Remove the extension of the caller's frame used for inline type unpacking + // The method has a scalarized entry point (where fields of value object arguments + // are passed through registers and stack), and a non-scalarized entry point (where + // value object arguments are given as oops). The non-scalarized entry point will + // first load each field of value object arguments and store them in registers and on + // the stack in a way compatible with the scalarized entry point. To do so, some extra + // stack space might be reserved (if argument registers are not enough). On leaving the + // method, this space must be freed. // - // Right now the stack looks like this: + // In case we used the non-scalarized entry point the stack looks like this: // // | Arguments from caller | // |---------------------------| <-- caller's SP @@ -5983,18 +5989,28 @@ void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair // will fix only this one. Overall, FP/LR #2 are not reliable and are simply // needed to add space between the extension space and the locals, as there // would be between the real arguments and the locals if we don't need to - // do unpacking. + // do unpacking (from the scalarized entry point). // // When restoring, one must then load FP #1 into x29, and LR #1 into x30, // while keeping in mind that from the scalarized entry point, there will be - // only one copy of each. + // only one copy of each. Indeed, in the case we used the scalarized calling + // convention, the stack looks like this: + // + // | Arguments from caller | + // |---------------------------| <-- caller's SP / start of this method's frame + // | Saved LR | + // | Saved FP | + // |---------------------------| <-- FP + // | sp_inc | + // | method locals | + // |---------------------------| <-- SP // // The sp_inc stack slot holds the total size of the frame including the // extension space minus two words for the saved FP and LR. That is how to // find FP/LR #1. This size is expressed in bytes. Be careful when using it // from C++ in pointer arithmetic; you might need to divide it by wordSize. // - // TODO 8371993 store fake values instead of LR/FP#2 + // One can find sp_inc since the start the method's frame is SP + initial_framesize. int sp_inc_offset = initial_framesize - 3 * wordSize; // Immediately below saved LR and FP diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 2f451e59e4f..b1facf1d572 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -49,6 +49,9 @@ #endif // C2 compiled method's prolog code. +// Beware! This sp_inc is NOT the same as the one mentioned in MacroAssembler::remove_frame but only the size +// of the extension space + the additional copy of the return address. That means, it doesn't contain the +// frame size (where the local and sp_inc are) and the saved RBP. void C2_MacroAssembler::verified_entry(Compile* C, int sp_inc) { if (C->clinit_barrier_on_entry()) { assert(VM_Version::supports_fast_class_init_checks(), "sanity"); diff --git a/src/hotspot/cpu/x86/frame_x86.cpp b/src/hotspot/cpu/x86/frame_x86.cpp index 8df4ebcd379..17d7cd36e3d 100644 --- a/src/hotspot/cpu/x86/frame_x86.cpp +++ b/src/hotspot/cpu/x86/frame_x86.cpp @@ -656,6 +656,31 @@ intptr_t* frame::repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) return sender_sp; } + +// See comment in MacroAssembler::remove_frame +frame::CompiledFramePointers frame::compiled_frame_details() const { + // frame owned by optimizing compiler + assert(_cb->frame_size() > 0, "must have non-zero frame size"); + intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); + assert(sender_sp == real_fp(), ""); + + // This is the saved value of EBP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame (or C1?). + // saved_fp_addr should be correct even for a bottom thawed frame (with a return barrier) + intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); + + // Repair the sender sp if the frame has been extended + sender_sp = repair_sender_sp(sender_sp, saved_fp_addr); + + CompiledFramePointers cfp; + cfp.sender_sp = sender_sp; + cfp.saved_fp_addr = saved_fp_addr; + // On Intel the return_address is always the word on the stack + cfp.sender_pc_addr = (address*)(sender_sp - frame::return_addr_offset); + + return cfp; +} + intptr_t* frame::repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr) { assert(nm != nullptr && nm->needs_stack_repair(), ""); // The stack increment resides just below the saved rbp on the stack diff --git a/src/hotspot/cpu/x86/frame_x86.hpp b/src/hotspot/cpu/x86/frame_x86.hpp index 07150a70b3a..272468ccd90 100644 --- a/src/hotspot/cpu/x86/frame_x86.hpp +++ b/src/hotspot/cpu/x86/frame_x86.hpp @@ -141,6 +141,12 @@ public: // Support for scalarized inline type calling convention intptr_t* repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const; + struct CompiledFramePointers { + intptr_t* sender_sp; // The top of the stack of the sender + intptr_t** saved_fp_addr; // Where RBP is saved on the stack + address* sender_pc_addr; // Where return address (copy #1 in remove_frame's comment) is saved on the stack + }; + CompiledFramePointers compiled_frame_details() const; static intptr_t* repair_sender_sp(nmethod* nm, intptr_t* sp, intptr_t** saved_fp_addr); bool was_augmented_on_entry(int& real_size) const; diff --git a/src/hotspot/cpu/x86/frame_x86.inline.hpp b/src/hotspot/cpu/x86/frame_x86.inline.hpp index 30c36b4d945..d275ea2b50b 100644 --- a/src/hotspot/cpu/x86/frame_x86.inline.hpp +++ b/src/hotspot/cpu/x86/frame_x86.inline.hpp @@ -426,36 +426,7 @@ inline frame frame::sender_raw(RegisterMap* map) const { inline frame frame::sender_for_compiled_frame(RegisterMap* map) const { assert(map != nullptr, "map must be set"); - - // frame owned by optimizing compiler - assert(_cb->frame_size() > 0, "must have non-zero frame size"); - intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); - assert(sender_sp == real_fp(), ""); - -#ifdef ASSERT - address sender_pc_copy = (address) *(sender_sp-1); -#endif - - // This is the saved value of EBP which may or may not really be an FP. - // It is only an FP if the sender is an interpreter frame (or C1?). - // saved_fp_addr should be correct even for a bottom thawed frame (with a return barrier) - intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); - - // Repair the sender sp if the frame has been extended - sender_sp = repair_sender_sp(sender_sp, saved_fp_addr); - - // On Intel the return_address is always the word on the stack - address sender_pc = (address) *(sender_sp-1); - -#ifdef ASSERT - if (sender_pc != sender_pc_copy) { - // When extending the stack in the callee method entry to make room for unpacking of value - // type args, we keep a copy of the sender pc at the expected location in the callee frame. - // If the sender pc is patched due to deoptimization, the copy is not consistent anymore. - nmethod* nm = CodeCache::find_blob(sender_pc)->as_nmethod(); - assert(sender_pc == nm->deopt_handler_entry(), "unexpected sender pc"); - } -#endif + CompiledFramePointers cfp = compiled_frame_details(); if (map->update_map()) { // Tell GC to use argument oopmaps for some runtime stubs that need it. @@ -494,21 +465,21 @@ inline frame frame::sender_for_compiled_frame(RegisterMap* map) const { // Since the prolog does the save and restore of EBP there is no oopmap // for it so we must fill in its location as if there was an oopmap entry // since if our caller was compiled code there could be live jvm state in it. - update_map_with_saved_link(map, saved_fp_addr); + update_map_with_saved_link(map, cfp.saved_fp_addr); } - assert(sender_sp != sp(), "must have changed"); + assert(cfp.sender_sp != sp(), "must have changed"); - if (Continuation::is_return_barrier_entry(sender_pc)) { + if (Continuation::is_return_barrier_entry(*cfp.sender_pc_addr)) { if (map->walk_cont()) { // about to walk into an h-stack return Continuation::top_frame(*this, map); } else { - return Continuation::continuation_bottom_sender(map->thread(), *this, sender_sp); + return Continuation::continuation_bottom_sender(map->thread(), *this, cfp.sender_sp); } } - intptr_t* unextended_sp = sender_sp; - return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); + intptr_t* unextended_sp = cfp.sender_sp; + return frame(cfp.sender_sp, unextended_sp, *cfp.saved_fp_addr, *cfp.sender_pc_addr); } template diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 3636597cb0d..0c0c606cb56 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -6065,7 +6065,11 @@ bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState re } // Calculate the extra stack space required for packing or unpacking inline -// args and adjust the stack pointer +// args and adjust the stack pointer. +// +// This extra stack space take into account the copy #2 of the return address, +// but NOT the saved RBP or the normal size of the frame (see MacroAssembler::remove_frame +// for notations). int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) { // Two additional slots to account for return address int sp_inc = (args_on_stack + 2) * VMRegImpl::stack_slot_size; @@ -6076,7 +6080,13 @@ int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) { assert(sp_inc > 0, "sanity"); pop(r13); subptr(rsp, sp_inc); +#ifdef ASSERT + movl(Address(rsp, -VMRegImpl::stack_slot_size), badRegWordVal); + movl(Address(rsp, -2 * VMRegImpl::stack_slot_size), badRegWordVal); + subptr(rsp, 2 * VMRegImpl::stack_slot_size); +#else push(r13); +#endif return sp_inc; } @@ -6311,7 +6321,60 @@ VMReg MacroAssembler::spill_reg_for(VMReg reg) { void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair) { assert((initial_framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); if (needs_stack_repair) { - // TODO 8284443 Add a comment drawing the frame like in Aarch64's version of MacroAssembler::remove_frame + // The method has a scalarized entry point (where fields of value object arguments + // are passed through registers and stack), and a non-scalarized entry point (where + // value object arguments are given as oops). The non-scalarized entry point will + // first load each field of value object arguments and store them in registers and on + // the stack in a way compatible with the scalarized entry point. To do so, some extra + // stack space might be reserved (if argument registers are not enough). On leaving the + // method, this space must be freed. + // + // In case we used the non-scalarized entry point the stack looks like this: + // + // | Arguments from caller | + // |---------------------------| <-- caller's SP + // | Return address #1 | + // |---------------------------| + // | Extension space for | + // | inline arg (un)packing | + // |---------------------------| + // | Return address #2 | + // | Saved RBP | + // |---------------------------| <-- start of this method's frame + // | sp_inc | + // | method locals | + // |---------------------------| <-- SP + // + // There is two copies of the return address on the stack. They will be identical at + // first, but that can change. + // If the caller has been deoptimized, the copy #1 will be patched to point at the + // deopt blob, and the copy #2 will still point into the old method. In short + // the copy #2 is not reliable and should not be used. It is mostly needed to + // add space between the extension space and the locals, as there would be between + // the real arguments and the locals if we don't need to do unpacking (from the + // scalarized entry point). + // + // When leaving, one must use the copy #1 of the return address, while keeping in mind + // that from the scalarized entry point, there will be only one copy. Indeed, in the + // case we used the scalarized calling convention, the stack looks like this: + // + // | Arguments from caller | + // |---------------------------| <-- caller's SP + // | Return address | + // | Saved RBP | + // |---------------------------| <-- start of this method's frame + // | sp_inc | + // | method locals | + // |---------------------------| <-- SP + // + // The sp_inc stack slot holds the total size of the frame, including the extension + // space the possible copy #2 of the return address and the saved RBP (but never the + // copy #1 of the return address). That is how to find the copy #1 of the return address. + // This size is expressed in bytes. Be careful when using it from C++ in pointer arithmetic; + // you might need to divide it by wordSize. + // + // One can find sp_inc since the start the method's frame is SP + initial_framesize. + movq(rbp, Address(rsp, initial_framesize)); // The stack increment resides just below the saved rbp addq(rsp, Address(rsp, initial_framesize - wordSize)); diff --git a/test/jdk/ProblemList.txt b/test/jdk/ProblemList.txt index 3eea2dea1f0..49f1961eb2a 100644 --- a/test/jdk/ProblemList.txt +++ b/test/jdk/ProblemList.txt @@ -840,8 +840,8 @@ java/lang/invoke/VarHandles/VarHandleTestMethodHandleAccessValue.java 8367346 ge jdk/classfile/AccessFlagsTest.java 8366270 generic-all jdk/jfr/event/runtime/TestClassLoaderStatsEvent.java 8366820 generic-all -jdk/internal/vm/Continuation/Fuzz.java#default 8370177 generic-aarch64 -jdk/internal/vm/Continuation/Fuzz.java#preserve-fp 8370177 generic-aarch64 +jdk/internal/vm/Continuation/Fuzz.java#default 8370177 generic-aarch64,generic-x64 +jdk/internal/vm/Continuation/Fuzz.java#preserve-fp 8370177 generic-aarch64,generic-x64 sun/tools/jhsdb/BasicLauncherTest.java 8366806 generic-all sun/tools/jhsdb/HeapDumpTest.java 8366806 generic-all