From 53fffef7b69af7e0721271c7c5922edcaf3edd1d Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 27 Sep 2025 21:18:16 -0600 Subject: [PATCH 01/12] embed.fnc: Add string assertions for isSCRIPTRUN This function is documented to handle empty strings, so EPTRge is appropriate. --- embed.fnc | 4 ++-- proto.h | 2 +- regexec.c | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/embed.fnc b/embed.fnc index 7f10a3cd9261..7db1d26bbb4a 100644 --- a/embed.fnc +++ b/embed.fnc @@ -4282,8 +4282,8 @@ ep |void |Slab_to_rw |NN OPSLAB * const slab # endif #endif /* defined(PERL_CORE) */ #if defined(PERL_CORE) || defined(PERL_EXT) -ERXdp |bool |isSCRIPT_RUN |NN const U8 *s \ - |NN const U8 *send \ +ERXdp |bool |isSCRIPT_RUN |SPTR const U8 *s \ + |EPTRge const U8 *send \ |const bool utf8_target ERTXdip |bool |is_utf8_non_invariant_string \ |NN const U8 * const s \ diff --git a/proto.h b/proto.h index 95eb1aa7d501..de77d935c101 100644 --- a/proto.h +++ b/proto.h @@ -6147,7 +6147,7 @@ PERL_CALLCONV bool Perl_isSCRIPT_RUN(pTHX_ const U8 *s, const U8 *send, const bool utf8_target) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_ISSCRIPT_RUN \ - assert(s); assert(send) + assert(s); assert(send); assert(s <= send) # if defined(PERL_IN_DOOP_C) || defined(PERL_IN_OP_C) || \ defined(PERL_IN_PP_C) || defined(PERL_IN_REGCOMP_ANY) || \ diff --git a/regexec.c b/regexec.c index c854666f1c39..9d4e1557069c 100644 --- a/regexec.c +++ b/regexec.c @@ -11761,6 +11761,8 @@ it are from the Inherited or Common scripts. bool Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target) { + PERL_ARGS_ASSERT_ISSCRIPT_RUN; + /* Basically, it looks at each character in the sequence to see if the * above conditions are met; if not it fails. It uses an inversion map to * find the enum corresponding to the script of each character. But this From c89c433c34fd312395fb7fb9d590b2e881689d1a Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sun, 28 Sep 2025 07:39:23 -0600 Subject: [PATCH 02/12] embed.fnc: Add EPTR for pos_b2u_midway This handles an empty string, and is called with that currently a lot --- embed.fnc | 2 +- proto.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/embed.fnc b/embed.fnc index 7db1d26bbb4a..e586d18bc61e 100644 --- a/embed.fnc +++ b/embed.fnc @@ -6055,7 +6055,7 @@ S |bool |sv_2iuv_common |NN SV * const sv S |STRLEN |sv_pos_b2u_midway \ |SPTR const U8 * const s \ |MPTR const U8 * const target \ - |NN const U8 *end \ + |EPTRge const U8 *end \ |STRLEN endu S |STRLEN |sv_pos_u2b_cached \ |NN SV * const sv \ diff --git a/proto.h b/proto.h index de77d935c101..d9fc71f63199 100644 --- a/proto.h +++ b/proto.h @@ -9245,7 +9245,8 @@ S_sv_display(pTHX_ SV * const sv, char *tmpbuf, STRLEN tmpbuf_size); STATIC STRLEN S_sv_pos_b2u_midway(pTHX_ const U8 * const s, const U8 * const target, const U8 *end, STRLEN endu); # define PERL_ARGS_ASSERT_SV_POS_B2U_MIDWAY \ - assert(s); assert(target); assert(end); assert(s <= target) + assert(s); assert(target); assert(end); assert(s <= target); \ + assert(target <= end) STATIC STRLEN S_sv_pos_u2b_cached(pTHX_ SV * const sv, MAGIC ** const mgp, const U8 * const start, const U8 * const send, STRLEN uoffset, STRLEN uoffset0, STRLEN boffset0); From da7130a19a4d6131d1c96745a7c0692303df3127 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 27 Sep 2025 21:24:30 -0600 Subject: [PATCH 03/12] embed.fnc: Add string asserts for EPTR for variant_under_utf8_count It can handle an empty string and is called with empty strings --- embed.fnc | 4 ++-- proto.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/embed.fnc b/embed.fnc index e586d18bc61e..0721e3099e08 100644 --- a/embed.fnc +++ b/embed.fnc @@ -4294,8 +4294,8 @@ Ei |STRLEN |sv_or_pv_pos_u2b \ |STRLEN pos \ |NULLOK STRLEN *lenp ERTdi |Size_t |variant_under_utf8_count \ - |NN const U8 * const s \ - |NN const U8 * const e + |SPTR const U8 * const s \ + |EPTRge const U8 * const e # if !defined(HAS_MEMRCHR) ETei |void * |my_memrchr |NN const char *s \ |const char c \ diff --git a/proto.h b/proto.h index d9fc71f63199..5f5feba56fd2 100644 --- a/proto.h +++ b/proto.h @@ -10502,7 +10502,7 @@ PERL_STATIC_INLINE Size_t S_variant_under_utf8_count(const U8 * const s, const U8 * const e) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT \ - assert(s); assert(e) + assert(s); assert(e); assert(s <= e) # if !defined(HAS_MEMRCHR) PERL_STATIC_INLINE void * From 0d5ea491907c6cea1259d533e744e7ecb927c5a6 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 27 Sep 2025 21:30:07 -0600 Subject: [PATCH 04/12] embed.fnc: Add string asserts for first_symbol,need_utf8: These can handle empty strings, and are called with them. --- embed.fnc | 8 ++++---- proto.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/embed.fnc b/embed.fnc index 0721e3099e08..dd2f3ffc000e 100644 --- a/embed.fnc +++ b/embed.fnc @@ -5298,8 +5298,8 @@ IR |bool |should_we_output_Debug_r \ #if defined(PERL_IN_PP_PACK_C) S |int |div128 |NN SV *pnum \ |NN bool *done -ST |char |first_symbol |NN const char *pat \ - |NN const char *patend +ST |char |first_symbol |SPTR const char *pat \ + |EPTRge const char *patend RS |const char *|get_num |NN const char *patptr \ |NN SSize_t *lenptr S |const char *|group_end |SPTR const char *patptr \ @@ -5315,8 +5315,8 @@ RST |char * |my_bytes_to_utf8 \ |STRLEN len \ |NN char *dest \ |const bool needs_swap -ST |bool |need_utf8 |NN const char *pat \ - |NN const char *patend +ST |bool |need_utf8 |SPTR const char *pat \ + |EPTRge const char *patend S |bool |next_symbol |NN struct tempsym *symptr S |SV ** |pack_rec |NN SV *cat \ |NN struct tempsym *symptr \ diff --git a/proto.h b/proto.h index 5f5feba56fd2..27c3ee330882 100644 --- a/proto.h +++ b/proto.h @@ -8061,7 +8061,7 @@ S_div128(pTHX_ SV *pnum, bool *done); STATIC char S_first_symbol(const char *pat, const char *patend); # define PERL_ARGS_ASSERT_FIRST_SYMBOL \ - assert(pat); assert(patend) + assert(pat); assert(patend); assert(pat <= patend) STATIC const char * S_get_num(pTHX_ const char *patptr, SSize_t *lenptr) @@ -8099,7 +8099,7 @@ S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swa STATIC bool S_need_utf8(const char *pat, const char *patend); # define PERL_ARGS_ASSERT_NEED_UTF8 \ - assert(pat); assert(patend) + assert(pat); assert(patend); assert(pat <= patend) STATIC bool S_next_symbol(pTHX_ struct tempsym *symptr); From f0fdb4f5a48f287d22ec70aa3dc3797890f36451 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 16 Dec 2025 10:19:54 -0700 Subject: [PATCH 05/12] embed.fnc: grok_bslash_[ox]: Use EPTR_gt These two functions examine their input string without checking if it is zero length. So, the assertion needs to change. They aren't ever called with an empty string. --- embed.fnc | 4 ++-- proto.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/embed.fnc b/embed.fnc index dd2f3ffc000e..b1611498f3a8 100644 --- a/embed.fnc +++ b/embed.fnc @@ -4534,7 +4534,7 @@ ERXp |bool |grok_bslash_c |const char source \ |NN const char **message \ |NULLOK U32 *packed_warn ERXp |bool |grok_bslash_o |SPTR char **s \ - |EPTRge const char * const send \ + |EPTRgt const char * const send \ |NN UV *uv \ |NN const char **message \ |NULLOK U32 *packed_warn \ @@ -4542,7 +4542,7 @@ ERXp |bool |grok_bslash_o |SPTR char **s \ |const bool allow_UV_MAX \ |const bool utf8 ERXp |bool |grok_bslash_x |SPTR char **s \ - |EPTRge const char * const send \ + |EPTRgt const char * const send \ |NN UV *uv \ |NN const char **message \ |NULLOK U32 *packed_warn \ diff --git a/proto.h b/proto.h index 27c3ee330882..9bfec16f4d49 100644 --- a/proto.h +++ b/proto.h @@ -6901,11 +6901,11 @@ S_do_trans_simple(pTHX_ SV * const sv, const OPtrans_map * const tbl) # define PERL_ARGS_ASSERT_GROK_BSLASH_O \ assert(s); assert(*s); assert(send); assert(uv); assert(message); \ - assert(*s <= send) + assert(*s < send) # define PERL_ARGS_ASSERT_GROK_BSLASH_X \ assert(s); assert(*s); assert(send); assert(uv); assert(message); \ - assert(*s <= send) + assert(*s < send) #endif /* defined(PERL_IN_DQUOTE_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) */ From 59b8d50f94c0b6caef49f759fbdc35abf304155e Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 16 Dec 2025 10:23:20 -0700 Subject: [PATCH 06/12] embed.fnc: Add string assertions for grok_numeric_radix This function takes a string with a beginning and ending pointer. It doesn't dereference if the string is empty, and returns the correct value when empty, and does get called with empty strings. --- embed.fnc | 4 ++-- proto.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/embed.fnc b/embed.fnc index b1611498f3a8..0e8cafbc394e 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1427,8 +1427,8 @@ Adp |int |grok_number_flags \ |NULLOK UV *valuep \ |U32 flags ARdp |bool |grok_numeric_radix \ - |NN const char **sp \ - |NN const char *send + |SPTR const char **sp \ + |EPTRge const char *send AMdp |UV |grok_oct |NN const char *start \ |NN STRLEN *len_p \ |NN I32 *flags \ diff --git a/proto.h b/proto.h index 9bfec16f4d49..cb75c3f47ab3 100644 --- a/proto.h +++ b/proto.h @@ -1285,7 +1285,7 @@ PERL_CALLCONV bool Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX \ - assert(sp); assert(send) + assert(sp); assert(*sp); assert(send); assert(*sp <= send) PERL_CALLCONV UV Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result); From e01fe6f264ce387fd8d6b46023ef8893ead8587c Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 16 Dec 2025 10:27:33 -0700 Subject: [PATCH 07/12] embed.fnc: Add string assertions for utf8_hop_forward... These functions take a string argument with beginning and ending positions. They handle the case of an empty string properly, and the documentation says they handle empty strings. --- embed.fnc | 8 ++++---- proto.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/embed.fnc b/embed.fnc index 0e8cafbc394e..4baa828b049d 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3823,13 +3823,13 @@ ARTdip |U8 * |utf8_hop_back_overshoot \ |SPTR const U8 * const start \ |NULLOK SSize_t *remaining ARTdmp |U8 * |utf8_hop_forward \ - |NN const U8 *s \ + |SPTR const U8 *s \ |SSize_t off \ - |NN const U8 * const end + |EPTRge const U8 * const end ARTdip |U8 * |utf8_hop_forward_overshoot \ - |NN const U8 *s \ + |SPTR const U8 *s \ |SSize_t off \ - |NN const U8 * const end \ + |EPTRge const U8 * const end \ |NULLOK SSize_t *remaining ARTdip |U8 * |utf8_hop_overshoot \ |MPTR const U8 *s \ diff --git a/proto.h b/proto.h index cb75c3f47ab3..fcca93639df6 100644 --- a/proto.h +++ b/proto.h @@ -10303,7 +10303,7 @@ PERL_STATIC_INLINE U8 * Perl_utf8_hop_forward_overshoot(const U8 *s, SSize_t off, const U8 * const end, SSize_t *remaining) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_UTF8_HOP_FORWARD_OVERSHOOT \ - assert(s); assert(end) + assert(s); assert(end); assert(s <= end) PERL_STATIC_INLINE U8 * Perl_utf8_hop_overshoot(const U8 *s, SSize_t off, const U8 * const start, const U8 * const end, SSize_t *remaining) From 34639f89606e0ba87101d40d2ab63d47ff24962b Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 16 Dec 2025 10:31:31 -0700 Subject: [PATCH 08/12] pp_pack.c: Add missing 'S_' to function names Calls in this file to these functions bypassed the macros, with no harm currently done. But it isn't good practice. --- pp_pack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pp_pack.c b/pp_pack.c index f210984f4e37..78e5c003aa86 100644 --- a/pp_pack.c +++ b/pp_pack.c @@ -804,7 +804,7 @@ S_next_symbol(pTHX_ tempsym_t* symptr ) themselves if they need to do a lot of unpacks like this on it */ STATIC bool -need_utf8(const char *pat, const char *patend) +S_need_utf8(const char *pat, const char *patend) { bool first = TRUE; @@ -824,7 +824,7 @@ need_utf8(const char *pat, const char *patend) } STATIC char -first_symbol(const char *pat, const char *patend) { +S_first_symbol(const char *pat, const char *patend) { PERL_ARGS_ASSERT_FIRST_SYMBOL; while (pat < patend) { From 608762f85580aa593211457efe54e590597821a5 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 16 Dec 2025 10:46:17 -0700 Subject: [PATCH 09/12] embed.fnc: Change EPTR assert for regcurly to gt This internal function can handle empty strings, but it isn't ever called with one so far, and it is better practice to not call it with an empty string --- embed.fnc | 2 +- proto.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/embed.fnc b/embed.fnc index 4baa828b049d..8aa3b1283fc2 100644 --- a/embed.fnc +++ b/embed.fnc @@ -5699,7 +5699,7 @@ ETXp |UV |to_fold_latin1_|const U8 c \ #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) ERTXp |bool |regcurly |SPTR const char *s \ - |EPTRge const char *e \ + |EPTRgt const char *e \ |NULLOK const char *result[5] #endif #if defined(PERL_IN_REGCOMP_DEBUG_C) && defined(DEBUGGING) diff --git a/proto.h b/proto.h index fcca93639df6..c71995b5d590 100644 --- a/proto.h +++ b/proto.h @@ -8665,7 +8665,7 @@ Perl_populate_invlist_from_bitmap(pTHX_ const U8 *bitmap, const Size_t bitmap_le #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) # define PERL_ARGS_ASSERT_REGCURLY \ - assert(s); assert(e); assert(s <= e) + assert(s); assert(e); assert(s < e) # if defined(PERL_CORE) || defined(PERL_EXT) PERL_CALLCONV bool From f699261603dad280a3927fd0c7be71ed8de5f50e Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 16 Dec 2025 10:50:29 -0700 Subject: [PATCH 10/12] embed.fnc: Change EPTR get_quantifier_value assert to gt This internal function looks problematic with regard to handling empty strings, but it isn't ever called with one so far. Change to catch such calls that might get added in the future. --- embed.fnc | 2 +- proto.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/embed.fnc b/embed.fnc index 8aa3b1283fc2..459740178227 100644 --- a/embed.fnc +++ b/embed.fnc @@ -5511,7 +5511,7 @@ ETi |Size_t |find_first_differing_byte_pos \ ES |U32 |get_quantifier_value \ |NN RExC_state_t *pRExC_state \ |SPTR const char *start \ - |EPTRge const char *end + |EPTRgt const char *end ES |bool |grok_bslash_N |NN RExC_state_t *pRExC_state \ |NULLOK regnode_offset *nodep \ |NULLOK UV *code_point_p \ diff --git a/proto.h b/proto.h index c71995b5d590..b0224d7a6a54 100644 --- a/proto.h +++ b/proto.h @@ -8409,7 +8409,7 @@ Perl_invlist_clone(pTHX_ SV * const invlist, SV *newlist); assert(screamer); assert(strbeg <= stringarg) # define PERL_ARGS_ASSERT_GET_QUANTIFIER_VALUE \ - assert(pRExC_state); assert(start); assert(end); assert(start <= end) + assert(pRExC_state); assert(start); assert(end); assert(start < end) # define PERL_ARGS_ASSERT_GROK_BSLASH_N \ assert(pRExC_state); assert(flagp) From dcc0e253a1374cf06d73dced7090e712c7b8b4a2 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 16 Dec 2025 10:56:35 -0700 Subject: [PATCH 11/12] embed.fnc: Add string assertions for debug_start_match This internal function takes a string argument with beginning and ending positions. It handles the case of an empty string properly. --- embed.fnc | 4 ++-- proto.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/embed.fnc b/embed.fnc index 459740178227..064cc1a59ea0 100644 --- a/embed.fnc +++ b/embed.fnc @@ -5919,8 +5919,8 @@ EWi |void |unwind_paren |NN regexp *rex \ ES |void |debug_start_match \ |NN const REGEXP *prog \ |const bool do_utf8 \ - |NN const char *start \ - |NN const char *end \ + |SPTR const char *start \ + |EPTRge const char *end \ |NN const char *blurb ES |void |dump_exec_pos |NN const char *locinput \ |NN const regnode *scan \ diff --git a/proto.h b/proto.h index b0224d7a6a54..bbd0c2f235ce 100644 --- a/proto.h +++ b/proto.h @@ -8933,7 +8933,8 @@ S_unwind_scan_frames(pTHX_ void *p); # if defined(DEBUGGING) # define PERL_ARGS_ASSERT_DEBUG_START_MATCH \ - assert(prog); assert(start); assert(end); assert(blurb) + assert(prog); assert(start); assert(end); assert(blurb); \ + assert(start <= end) # define PERL_ARGS_ASSERT_DUMP_EXEC_POS \ assert(locinput); assert(scan); assert(loc_regeol); assert(loc_bostr); \ From 35da435eff2c7d603e50e8d34f076caaf0259173 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 16 Dec 2025 10:59:14 -0700 Subject: [PATCH 12/12] embed.fnc: Add string assertions for dump_exec_pos This internal function takes a string argument with beginning and ending positions. It is called all the time with an empty string, --- embed.fnc | 4 ++-- proto.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/embed.fnc b/embed.fnc index 064cc1a59ea0..742deae2522e 100644 --- a/embed.fnc +++ b/embed.fnc @@ -5922,9 +5922,9 @@ ES |void |debug_start_match \ |SPTR const char *start \ |EPTRge const char *end \ |NN const char *blurb -ES |void |dump_exec_pos |NN const char *locinput \ +ES |void |dump_exec_pos |SPTR const char *locinput \ |NN const regnode *scan \ - |NN const char *loc_regeol \ + |EPTRge const char *loc_regeol \ |NN const char *loc_bostr \ |NN const char *loc_reg_starttry \ |const bool do_utf8 \ diff --git a/proto.h b/proto.h index bbd0c2f235ce..8b58f80e3f02 100644 --- a/proto.h +++ b/proto.h @@ -8938,7 +8938,7 @@ S_unwind_scan_frames(pTHX_ void *p); # define PERL_ARGS_ASSERT_DUMP_EXEC_POS \ assert(locinput); assert(scan); assert(loc_regeol); assert(loc_bostr); \ - assert(loc_reg_starttry) + assert(loc_reg_starttry); assert(locinput <= loc_regeol) # define PERL_ARGS_ASSERT_RE_EXEC_INDENTF \ assert(fmt)