From e0c7f682c2c2144c22343055a84c96c0ee16acde Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Wed, 8 Jan 2025 14:38:59 +0000 Subject: [PATCH 01/14] Add WriteBarrier to dictobject This implements the addreference and remove reference logic for the dictionary. --- Include/internal/pycore_regions.h | 13 +++--- Lib/test/test_regions_dictobject.py | 50 +++++++++++++++++++++ Objects/dictobject.c | 67 +++++++++++++++++++++++++---- Objects/object.c | 4 +- Objects/regions.c | 55 ++++++++++++++++++----- 5 files changed, 162 insertions(+), 27 deletions(-) create mode 100644 Lib/test/test_regions_dictobject.py diff --git a/Include/internal/pycore_regions.h b/Include/internal/pycore_regions.h index 6b363abd49423d..9c4a245e235df8 100644 --- a/Include/internal/pycore_regions.h +++ b/Include/internal/pycore_regions.h @@ -57,15 +57,18 @@ PyObject* _Py_ResetInvariant(void); #define Py_ResetInvariant() _Py_ResetInvariant() // Invariant placeholder -bool _Pyrona_AddReference(PyObject* target, PyObject* new_ref); -#define Pyrona_ADDREFERENCE(a, b) _Pyrona_AddReference(a, b) -#define Pyrona_REMOVEREFERENCE(a, b) // TODO +bool _Py_RegionAddReference(PyObject* src, PyObject* new_tgt); +#define Py_REGIONADDREFERENCE(a, b) _Py_RegionAddReference(a, b) + // Helper macros to count the number of arguments #define _COUNT_ARGS(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, N, ...) N #define COUNT_ARGS(...) _COUNT_ARGS(__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) -bool _Pyrona_AddReferences(PyObject* target, int new_refc, ...); -#define Pyrona_ADDREFERENCES(a, ...) _Pyrona_AddReferences(a, COUNT_ARGS(__VA_ARGS__), __VA_ARGS__) +bool _Py_RegionAddReferences(PyObject* src, int new_tgtc, ...); +#define Py_REGIONADDREFERENCES(a, ...) _Py_RegionAddReferences(a, COUNT_ARGS(__VA_ARGS__), __VA_ARGS__) + +void _Py_RegionRemoveReference(PyObject* src, PyObject* old_tgt); +#define Py_REGIONREMOVEREFERENCE(a, b) _Py_RegionRemoveReference(a, b) #ifdef NDEBUG #define _Py_VPYDBG(fmt, ...) diff --git a/Lib/test/test_regions_dictobject.py b/Lib/test/test_regions_dictobject.py new file mode 100644 index 00000000000000..425a5442c46170 --- /dev/null +++ b/Lib/test/test_regions_dictobject.py @@ -0,0 +1,50 @@ +import unittest + +class TestRegionsDictObject(unittest.TestCase): + def setUp(self): + enableinvariant() + + def test_dict_insert_empty_dict(self): + # Create Region with Empty dictionary + r = Region() + d = {} + r.add_object(d) + n = {} + # Add local object to region + d["foo"] = n + self.assertTrue(r.owns_object(n)) + + def test_dict_insert_nonempty_dict(self): + # Create Region with Nonempty dictionary + r = Region() + d = {} + d["bar"] = 1 + r.add_object(d) + # Add local object to region + n = {} + d["foo"] = n + self.assertTrue(r.owns_object(n)) + + def test_dict_update_dict(self): + # Create Region with Nonempty dictionary + r = Region() + d = {} + n1 = {} + d["foo"] = n1 + r.add_object(d) + # Update dictionary to contain a local object + n2 = {} + d["foo"] = n2 + self.assertTrue(r.owns_object(n2)) + + def test_dict_clear(self): + # Create Region with Nonempty dictionary + r = Region() + d = {} + n = {} + d["foo"] = n + r.add_object(d) + # Clear dictionary + d.clear() + # As LRC is not checked by the invariant, this test cannot + # check anything useful yet. diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c1084ca1a3ca89..6ad0e352a9ab18 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -122,6 +122,7 @@ As a consequence of this, split keys have a maximum size of 16. #include "pycore_regions.h" // _PyObject_GC_TRACK() #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_regions.h" // Py_ADDREGIONREFERENCE(), ... (region) #include "stringlib/eq.h" // unicode_eq() #include "regions.h" // Py_IsImmutable() @@ -666,6 +667,10 @@ new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode) static void free_keys_object(PyInterpreterState *interp, PyDictKeysObject *keys) { + // TODO: This feels like it should remove the references in the regions + // but keys is not a Python object, so it's not clear how to do that. + // mjp: Leaving as a TODO for now. + assert(keys != Py_EMPTY_KEYS); if (DK_IS_UNICODE(keys)) { PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); @@ -830,6 +835,9 @@ clone_combined_dict_keys(PyDictObject *orig) if (value != NULL) { Py_INCREF(value); Py_INCREF(*pkey); + // TODO need to add_reference here. We don't know the underlying region + // but it should be freshly allocated, so can we consider it the local region + // TODO discuss before merging PR. } pvalue += offs; pkey += offs; @@ -1258,6 +1266,9 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, MAINTAIN_TRACKING(mp, key, value); if (ix == DKIX_EMPTY) { + if (!Py_REGIONADDREFERENCES((PyObject*)mp, key, value)) { + goto Fail; + } uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, value); /* Insert into new slot. */ @@ -1303,6 +1314,9 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, } if (old_value != value) { + if (!Py_REGIONADDREFERENCE((PyObject*)mp, value)) { + goto Fail; + } if(DK_IS_UNICODE(mp->ma_keys)){ PyDictUnicodeEntry *ep; ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix]; @@ -1336,6 +1350,7 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, else { _PyDictEntry_SetValue(DK_ENTRIES(mp->ma_keys) + ix, value); } + Py_REGIONREMOVEREFERENCE((PyObject*)mp, old_value); } mp->ma_version_tag = new_version; } @@ -1365,6 +1380,9 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp, return -1; } + if (!Py_REGIONADDREFERENCES((PyObject*)mp, key, value)) + return -1; + uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, value); @@ -2012,6 +2030,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix, if (mp->ma_values) { assert(old_value == mp->ma_values->values[ix]); mp->ma_values->values[ix] = NULL; + Py_REGIONREMOVEREFERENCE(mp, old_value); assert(ix < SHARED_KEYS_MAX_SIZE); /* Update order */ delete_index_from_values(mp->ma_values, ix); @@ -2181,7 +2200,11 @@ PyDict_Clear(PyObject *op) if (oldvalues != NULL) { n = oldkeys->dk_nentries; for (i = 0; i < n; i++) + { + PyObject* old = oldvalues->values[i]; Py_CLEAR(oldvalues->values[i]); + Py_REGIONREMOVEREFERENCE(op, old); + } free_values(oldvalues); dictkeys_decref(interp, oldkeys); } @@ -2459,7 +2482,9 @@ dict_dealloc(PyDictObject *mp) Py_TRASHCAN_BEGIN(mp, dict_dealloc) if (values != NULL) { for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) { + PyObject *value = values->values[i]; Py_XDECREF(values->values[i]); + Py_REGIONREMOVEREFERENCE(mp, value); } free_values(values); dictkeys_decref(interp, keys); @@ -3142,6 +3167,13 @@ PyDict_Copy(PyObject *o) dictkeys_incref(mp->ma_keys); for (size_t i = 0; i < size; i++) { PyObject *value = mp->ma_values->values[i]; + if (!Py_REGIONADDREFERENCE(split_copy, value)) + { + // TODO: is this safe to dealloc the split_copy? + // is it in a valid enough state to be deallocated? + Py_DECREF(split_copy); + return NULL; + } split_copy->ma_values->values[i] = Py_XNewRef(value); } if (_PyObject_GC_IS_TRACKED(mp)) @@ -3446,6 +3478,8 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) Py_ssize_t index = (int)mp->ma_keys->dk_nentries; assert(index < SHARED_KEYS_MAX_SIZE); assert(mp->ma_values->values[index] == NULL); + if (!Py_REGIONADDREFERENCE(mp, value)) + return NULL; mp->ma_values->values[index] = Py_NewRef(value); _PyDictValues_AddToInsertionOrder(mp->ma_values, index); } @@ -3467,6 +3501,8 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) assert(mp->ma_keys->dk_usable >= 0); } else if (value == NULL) { + if (!Py_REGIONADDREFERENCE(mp, value)) + return NULL; uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, defaultobj); value = defaultobj; @@ -5507,7 +5543,7 @@ _PyObject_InitializeDict(PyObject *obj) return -1; } PyObject **dictptr = _PyObject_ComputedDictPointer(obj); - Pyrona_ADDREFERENCE(obj, dict); + Py_REGIONADDREFERENCE(obj, dict); *dictptr = dict; return 0; } @@ -5553,11 +5589,20 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, assert(values != NULL); assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT); - if(!Py_CHECKWRITE(obj)){ + if (!Py_CHECKWRITE(obj)){ PyErr_WriteToImmutable(obj); return -1; } + //TODO: PYRONA: The addition of the key is complex here. + // The keys PyDictKeysObject, might already have the key. Note that + // the keys PyDictKeysObject is not a PyObject. So it is unclear where + // this edge is created. + // The keys is coming from ht_cached_keys on the type object. + // This is also interesting from a race condition perspective. + // Can this be shared, should it be treated immutably when the type is? + // mjp: Leaving for a future PR. + Py_ssize_t ix = DKIX_EMPTY; if (PyUnicode_CheckExact(name)) { ix = insert_into_dictkeys(keys, name); @@ -5589,9 +5634,13 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, return PyDict_SetItem(dict, name, value); } } + if (!Py_REGIONADDREFERENCE(obj, value)) { + return -1; + } PyObject *old_value = values->values[ix]; values->values[ix] = Py_XNewRef(value); if (old_value == NULL) { + Py_REGIONREMOVEREFERENCE(obj, old_value); if (value == NULL) { PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%U'", @@ -5775,7 +5824,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context) _Py_SetImmutable(dict); } else { - Pyrona_ADDREFERENCE(obj, dict); + Py_REGIONADDREFERENCE(obj, dict); dorv_ptr->dict = dict; } } @@ -5789,7 +5838,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context) _Py_SetImmutable(dict); } else { - Pyrona_ADDREFERENCE(obj, dict); + Py_REGIONADDREFERENCE(obj, dict); dorv_ptr->dict = dict; } } @@ -5817,7 +5866,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context) _Py_SetImmutable(dict); } else { - Pyrona_ADDREFERENCE(obj, dict); + Py_REGIONADDREFERENCE(obj, dict); *dictptr = dict; } } @@ -5841,7 +5890,7 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, if (dict == NULL) { dictkeys_incref(cached); dict = new_dict_with_shared_keys(interp, cached); - Pyrona_ADDREFERENCE(owner, dict); + Py_REGIONADDREFERENCE(owner, dict); if (dict == NULL) return -1; *dictptr = dict; @@ -5850,7 +5899,7 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, res = PyDict_DelItem(dict, key); } else { - if (Pyrona_ADDREFERENCES(dict, key, value)) { + if (Py_REGIONADDREFERENCES(dict, key, value)) { res = PyDict_SetItem(dict, key, value); } else { // Error is set inside ADDREFERENCE @@ -5863,14 +5912,14 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, dict = PyDict_New(); if (dict == NULL) return -1; - Pyrona_ADDREFERENCE(owner, dict); + Py_REGIONADDREFERENCE(owner, dict); *dictptr = dict; } if (value == NULL) { res = PyDict_DelItem(dict, key); } else { // TODO: remove this once we merge Matt P's changeset to dictionary object - if (Pyrona_ADDREFERENCES(dict, key, value)) { + if (Py_REGIONADDREFERENCES(dict, key, value)) { res = PyDict_SetItem(dict, key, value); } else { // Error is set inside ADDREFERENCE diff --git a/Objects/object.c b/Objects/object.c index b9ce043f6e4140..a9813a999fdb62 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1267,7 +1267,7 @@ _PyObject_GetDictPtr(PyObject *obj) PyErr_Clear(); return NULL; } - Pyrona_ADDREFERENCE(obj, dict); + Py_REGIONADDREFERENCE(obj, dict); dorv_ptr->dict = dict; } return &dorv_ptr->dict; @@ -1467,7 +1467,7 @@ _PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name, res = NULL; goto done; } - Pyrona_ADDREFERENCE(obj, dict); + Py_REGIONADDREFERENCE(obj, dict); dorv_ptr->dict = dict; } } diff --git a/Objects/regions.c b/Objects/regions.c index b1451e59f6647e..956cee17fd27b6 100644 --- a/Objects/regions.c +++ b/Objects/regions.c @@ -44,7 +44,8 @@ static regionmetadata* regionmetadata_get_parent(regionmetadata* self); static PyObject *PyRegion_add_object(PyRegionObject *self, PyObject *args); static PyObject *PyRegion_remove_object(PyRegionObject *self, PyObject *args); static const char *get_region_name(PyObject* obj); -static void _PyErr_Region(PyObject *tgt, PyObject *new_ref, const char *msg); +static void _PyErr_Region(PyObject *src, PyObject *tgt, const char *msg); +#define Py_REGION_DATA(ob) (_Py_CAST(regionmetadata*, Py_REGION(ob))) /** * Global status for performing the region check. @@ -1886,14 +1887,14 @@ PyTypeObject PyRegion_Type = { PyType_GenericNew, /* tp_new */ }; -void _PyErr_Region(PyObject *tgt, PyObject *new_ref, const char *msg) { - const char *new_ref_region_name = get_region_name(new_ref); +void _PyErr_Region(PyObject *src, PyObject *tgt, const char *msg) { const char *tgt_region_name = get_region_name(tgt); + const char *src_region_name = get_region_name(src); + PyObject *src_type_repr = PyObject_Repr(PyObject_Type(src)); + const char *src_desc = src_type_repr ? PyUnicode_AsUTF8(src_type_repr) : "<>"; PyObject *tgt_type_repr = PyObject_Repr(PyObject_Type(tgt)); const char *tgt_desc = tgt_type_repr ? PyUnicode_AsUTF8(tgt_type_repr) : "<>"; - PyObject *new_ref_type_repr = PyObject_Repr(PyObject_Type(new_ref)); - const char *new_ref_desc = new_ref_type_repr ? PyUnicode_AsUTF8(new_ref_type_repr) : "<>"; - PyErr_Format(PyExc_RuntimeError, "Error: Invalid edge %p (%s in %s) -> %p (%s in %s) %s\n", tgt, tgt_desc, tgt_region_name, new_ref, new_ref_desc, new_ref_region_name, msg); + PyErr_Format(PyExc_RuntimeError, "Error: Invalid edge %p (%s in %s) -> %p (%s in %s) %s\n", src, src_desc, src_region_name, tgt, tgt_desc, tgt_region_name, msg); } static const char *get_region_name(PyObject* obj) { @@ -1912,7 +1913,7 @@ static const char *get_region_name(PyObject* obj) { } // x.f = y ==> _Pyrona_AddReference(src=x, tgt=y) -bool _Pyrona_AddReference(PyObject *src, PyObject *tgt) { +bool _Py_RegionAddReference(PyObject *src, PyObject *tgt) { if (Py_REGION(src) == Py_REGION(tgt)) { // Nothing to do -- intra-region references are always permitted return true; @@ -1935,12 +1936,12 @@ bool _Pyrona_AddReference(PyObject *src, PyObject *tgt) { } // Convenience function for moving multiple references into tgt at once -bool _Pyrona_AddReferences(PyObject *tgt, int new_refc, ...) { +bool _Py_RegionAddReferences(PyObject *src, int tgtc, ...) { va_list args; - va_start(args, new_refc); + va_start(args, tgtc); - for (int i = 0; i < new_refc; i++) { - int res = _Pyrona_AddReference(tgt, va_arg(args, PyObject*)); + for (int i = 0; i < tgtc; i++) { + int res = _Py_RegionAddReference(src, va_arg(args, PyObject*)); if (!res) return false; } @@ -1954,3 +1955,35 @@ void _PyRegion_set_cown_parent(PyObject* bridge, PyObject* cown) { Py_XINCREF(cown); Py_XSETREF(data->cown, cown); } + +void _Py_RegionRemoveReference(PyObject *src, PyObject *tgt) { + if (Py_REGION(src) == Py_REGION(tgt)) { + // Nothing to do -- intra-region references have no accounting. + return; + } + + // If the target is local, then so must the source be. So this should + // be covered by the previous check. + assert(!_Py_IsLocal(tgt)); + + if (_Py_IsImmutable(tgt) || _Py_IsCown(tgt)) { + // Nothing to do -- removing a ref to an immutable or a cown has no additional accounting. + return; + } + + if (_Py_IsLocal(src)) { + // Dec LRC of the previously referenced region + regionmetadata_dec_lrc(Py_REGION_DATA(tgt)); + return; + } + + // This must be a parent reference, so we need to remove the parent reference. + regionmetadata* tgt_md = Py_REGION_DATA(tgt); + regionmetadata* src_md = Py_REGION_DATA(src); + if (tgt_md->parent == src_md) { + regionmetadata_unparent(tgt_md); + } else { + // TODO: Could `dirty` mean this isn't an error? + _PyErr_Region(src, tgt, "(in WB/remove_ref)"); + } +} From e8356826e37391c12f3989ad1719789be97f9fb6 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Wed, 8 Jan 2025 15:25:50 +0000 Subject: [PATCH 02/14] Fix merge --- Objects/regions.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/regions.c b/Objects/regions.c index 956cee17fd27b6..badbb931185a20 100644 --- a/Objects/regions.c +++ b/Objects/regions.c @@ -1981,7 +1981,8 @@ void _Py_RegionRemoveReference(PyObject *src, PyObject *tgt) { regionmetadata* tgt_md = Py_REGION_DATA(tgt); regionmetadata* src_md = Py_REGION_DATA(src); if (tgt_md->parent == src_md) { - regionmetadata_unparent(tgt_md); + // Unparent the region. + regionmetadata_set_parent(tgt_md, _Py_LOCAL_REGION); } else { // TODO: Could `dirty` mean this isn't an error? _PyErr_Region(src, tgt, "(in WB/remove_ref)"); From a49c15b111871b358352936d95e5803cde4d3831 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Wed, 8 Jan 2025 16:34:26 +0000 Subject: [PATCH 03/14] Fix add referece to modiy LRC --- Objects/regions.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/regions.c b/Objects/regions.c index badbb931185a20..1e5b1c8e5fbfcb 100644 --- a/Objects/regions.c +++ b/Objects/regions.c @@ -1925,8 +1925,9 @@ bool _Py_RegionAddReference(PyObject *src, PyObject *tgt) { } if (_Py_IsLocal(src)) { - // Slurp emphemerally owned object into the region of the target object + // Record the borrowed reference in the LRC of the target region // _Py_VPYDBG("Added borrowed ref %p --> %p (owner: '%s')\n", tgt, new_ref, get_region_name(tgt)); + Py_REGION_DATA(tgt)->lrc += 1; return true; } From 22225c03c492b99c5cec2fdfa514052003b99df9 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Wed, 8 Jan 2025 16:36:33 +0000 Subject: [PATCH 04/14] Fix merge --- Objects/regions.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Objects/regions.c b/Objects/regions.c index 1e5b1c8e5fbfcb..2982b3cdec89ba 100644 --- a/Objects/regions.c +++ b/Objects/regions.c @@ -45,7 +45,6 @@ static PyObject *PyRegion_add_object(PyRegionObject *self, PyObject *args); static PyObject *PyRegion_remove_object(PyRegionObject *self, PyObject *args); static const char *get_region_name(PyObject* obj); static void _PyErr_Region(PyObject *src, PyObject *tgt, const char *msg); -#define Py_REGION_DATA(ob) (_Py_CAST(regionmetadata*, Py_REGION(ob))) /** * Global status for performing the region check. @@ -1972,20 +1971,22 @@ void _Py_RegionRemoveReference(PyObject *src, PyObject *tgt) { return; } + regionmetadata* tgt_md = Py_REGION_DATA(tgt); if (_Py_IsLocal(src)) { // Dec LRC of the previously referenced region - regionmetadata_dec_lrc(Py_REGION_DATA(tgt)); + // TODO should this decrement be a function, if it hits zero, + // then a region could become unreachable. + tgt_md->lrc -= 1; return; } // This must be a parent reference, so we need to remove the parent reference. - regionmetadata* tgt_md = Py_REGION_DATA(tgt); regionmetadata* src_md = Py_REGION_DATA(src); - if (tgt_md->parent == src_md) { - // Unparent the region. - regionmetadata_set_parent(tgt_md, _Py_LOCAL_REGION); - } else { + if (Py_region_ptr(tgt_md->parent) != src_md) { // TODO: Could `dirty` mean this isn't an error? _PyErr_Region(src, tgt, "(in WB/remove_ref)"); } + + // Unparent the region. + regionmetadata_set_parent(tgt_md, _Py_LOCAL_REGION); } From 398e6d467640a75724af2f2b58f2bb475c724ef6 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 9 Jan 2025 09:22:00 +0000 Subject: [PATCH 05/14] Fix UAF --- Objects/dictobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 6ad0e352a9ab18..c8b9bf35a435bc 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2483,8 +2483,8 @@ dict_dealloc(PyDictObject *mp) if (values != NULL) { for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) { PyObject *value = values->values[i]; - Py_XDECREF(values->values[i]); Py_REGIONREMOVEREFERENCE(mp, value); + Py_XDECREF(values->values[i]); } free_values(values); dictkeys_decref(interp, keys); From 69eb8bdd7640ac734cdac5ae747564552f9e48f5 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 9 Jan 2025 09:32:31 +0000 Subject: [PATCH 06/14] Fixing some UAF potential scenarios. --- Objects/dictobject.c | 8 +++++--- Objects/regions.c | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c8b9bf35a435bc..790f06decc8f82 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2030,10 +2030,10 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix, if (mp->ma_values) { assert(old_value == mp->ma_values->values[ix]); mp->ma_values->values[ix] = NULL; - Py_REGIONREMOVEREFERENCE(mp, old_value); assert(ix < SHARED_KEYS_MAX_SIZE); /* Update order */ delete_index_from_values(mp->ma_values, ix); + Py_REGIONREMOVEREFERENCE(mp, old_value); ASSERT_CONSISTENT(mp); } else { @@ -2202,8 +2202,10 @@ PyDict_Clear(PyObject *op) for (i = 0; i < n; i++) { PyObject* old = oldvalues->values[i]; - Py_CLEAR(oldvalues->values[i]); + // TODO Should we build a combined macro for this + // Py_CLEAR_WITH_REGION(op, oldvalues->values[i]) Py_REGIONREMOVEREFERENCE(op, old); + Py_CLEAR(oldvalues->values[i]); } free_values(oldvalues); dictkeys_decref(interp, oldkeys); @@ -5640,7 +5642,6 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, PyObject *old_value = values->values[ix]; values->values[ix] = Py_XNewRef(value); if (old_value == NULL) { - Py_REGIONREMOVEREFERENCE(obj, old_value); if (value == NULL) { PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%U'", @@ -5653,6 +5654,7 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, if (value == NULL) { delete_index_from_values(values, ix); } + Py_REGIONREMOVEREFERENCE(obj, old_value); Py_DECREF(old_value); } return 0; diff --git a/Objects/regions.c b/Objects/regions.c index 2982b3cdec89ba..e1e60b4d8bd778 100644 --- a/Objects/regions.c +++ b/Objects/regions.c @@ -1988,5 +1988,5 @@ void _Py_RegionRemoveReference(PyObject *src, PyObject *tgt) { } // Unparent the region. - regionmetadata_set_parent(tgt_md, _Py_LOCAL_REGION); + regionmetadata_set_parent(tgt_md, NULL); } From 956ee53df7c7d966cc8cfda33325604cfe54c98a Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 9 Jan 2025 09:54:18 +0000 Subject: [PATCH 07/14] Warnings. --- Include/internal/pycore_regions.h | 6 +++--- Objects/regions.c | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_regions.h b/Include/internal/pycore_regions.h index 9c4a245e235df8..1102f896d77e1f 100644 --- a/Include/internal/pycore_regions.h +++ b/Include/internal/pycore_regions.h @@ -58,17 +58,17 @@ PyObject* _Py_ResetInvariant(void); // Invariant placeholder bool _Py_RegionAddReference(PyObject* src, PyObject* new_tgt); -#define Py_REGIONADDREFERENCE(a, b) _Py_RegionAddReference(a, b) +#define Py_REGIONADDREFERENCE(a, b) _Py_RegionAddReference(_PyObject_CAST(a), b) // Helper macros to count the number of arguments #define _COUNT_ARGS(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, N, ...) N #define COUNT_ARGS(...) _COUNT_ARGS(__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) bool _Py_RegionAddReferences(PyObject* src, int new_tgtc, ...); -#define Py_REGIONADDREFERENCES(a, ...) _Py_RegionAddReferences(a, COUNT_ARGS(__VA_ARGS__), __VA_ARGS__) +#define Py_REGIONADDREFERENCES(a, ...) _Py_RegionAddReferences(_PyObject_CAST(a), COUNT_ARGS(__VA_ARGS__), __VA_ARGS__) void _Py_RegionRemoveReference(PyObject* src, PyObject* old_tgt); -#define Py_REGIONREMOVEREFERENCE(a, b) _Py_RegionRemoveReference(a, b) +#define Py_REGIONREMOVEREFERENCE(a, b) _Py_RegionRemoveReference(_PyObject_CAST(a), b) #ifdef NDEBUG #define _Py_VPYDBG(fmt, ...) diff --git a/Objects/regions.c b/Objects/regions.c index e1e60b4d8bd778..ea578e8c5fc812 100644 --- a/Objects/regions.c +++ b/Objects/regions.c @@ -1982,7 +1982,8 @@ void _Py_RegionRemoveReference(PyObject *src, PyObject *tgt) { // This must be a parent reference, so we need to remove the parent reference. regionmetadata* src_md = Py_REGION_DATA(src); - if (Py_region_ptr(tgt_md->parent) != src_md) { + regionmetadata* tgt_parent_md = REGION_DATA_CAST(Py_region_ptr(tgt_md->parent)); + if (tgt_parent_md != src_md) { // TODO: Could `dirty` mean this isn't an error? _PyErr_Region(src, tgt, "(in WB/remove_ref)"); } From ff38aa0968df7567d52863e068d5f1422408068c Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Thu, 9 Jan 2025 16:33:29 +0000 Subject: [PATCH 08/14] Fix merge --- Objects/dictobject.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 790f06decc8f82..273c4be22ec1e5 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -5920,13 +5920,7 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, if (value == NULL) { res = PyDict_DelItem(dict, key); } else { - // TODO: remove this once we merge Matt P's changeset to dictionary object - if (Py_REGIONADDREFERENCES(dict, key, value)) { - res = PyDict_SetItem(dict, key, value); - } else { - // Error is set inside ADDREFERENCE - return -1; - } + res = PyDict_SetItem(dict, key, value); } } ASSERT_CONSISTENT(dict); From d8b2390c63c37d35f9db4901929e3eb2c53647b1 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 10 Jan 2025 13:53:27 +0000 Subject: [PATCH 09/14] Py_CLEAR This adds a Py_CLEAR like thing to remove the reference in the correct point. --- Include/internal/pycore_regions.h | 8 ++++++++ Objects/dictobject.c | 8 +------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_regions.h b/Include/internal/pycore_regions.h index 1102f896d77e1f..43982752bc908f 100644 --- a/Include/internal/pycore_regions.h +++ b/Include/internal/pycore_regions.h @@ -86,6 +86,14 @@ int _PyRegion_is_closed(PyObject* region); int _PyCown_release(PyObject *self); int _PyCown_is_released(PyObject *self); + +#define Py_CLEAR_OBJECT_FIELD(obj, field) do { \ + PyObject *tmp = (PyObject *)(field); \ + (field) = NULL; \ + Py_REGIONREMOVEREFERENCE(obj, tmp); \ + Py_XDECREF(tmp); \ +} while (0) + #ifdef __cplusplus } #endif diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 273c4be22ec1e5..7c37de1211c691 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2200,13 +2200,7 @@ PyDict_Clear(PyObject *op) if (oldvalues != NULL) { n = oldkeys->dk_nentries; for (i = 0; i < n; i++) - { - PyObject* old = oldvalues->values[i]; - // TODO Should we build a combined macro for this - // Py_CLEAR_WITH_REGION(op, oldvalues->values[i]) - Py_REGIONREMOVEREFERENCE(op, old); - Py_CLEAR(oldvalues->values[i]); - } + Py_CLEAR_OBJECT_FIELD(op, oldvalues->values[i]); free_values(oldvalues); dictkeys_decref(interp, oldkeys); } From b1ef1f96fb4da766a5bea497d40db7a0e01ccc5d Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Tue, 14 Jan 2025 09:05:46 +0000 Subject: [PATCH 10/14] Fixes --- Lib/test/test_regions_dictobject.py | 38 ++++++++++++++++++++++++++--- Objects/dictobject.c | 8 +++--- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_regions_dictobject.py b/Lib/test/test_regions_dictobject.py index 425a5442c46170..40ba4da24773ea 100644 --- a/Lib/test/test_regions_dictobject.py +++ b/Lib/test/test_regions_dictobject.py @@ -8,7 +8,7 @@ def test_dict_insert_empty_dict(self): # Create Region with Empty dictionary r = Region() d = {} - r.add_object(d) + r.body = d n = {} # Add local object to region d["foo"] = n @@ -19,7 +19,7 @@ def test_dict_insert_nonempty_dict(self): r = Region() d = {} d["bar"] = 1 - r.add_object(d) + r.body = d # Add local object to region n = {} d["foo"] = n @@ -31,7 +31,7 @@ def test_dict_update_dict(self): d = {} n1 = {} d["foo"] = n1 - r.add_object(d) + r.body = d # Update dictionary to contain a local object n2 = {} d["foo"] = n2 @@ -43,8 +43,38 @@ def test_dict_clear(self): d = {} n = {} d["foo"] = n - r.add_object(d) + r.body = d # Clear dictionary d.clear() # As LRC is not checked by the invariant, this test cannot # check anything useful yet. + + def test_dict_copy(self): + r = Region() + d = {} + r.body = d + r2 = Region() + d["foo"] = r2 + d.copy() + + def test_dict_setdefault(self): + r = Region("outer") + d = {} + r.body = d + r2 = Region("inner") + d["foo"] = r2 + d.setdefault("foo", r2) + self.assertRaises(RegionError, d.setdefault, "bar", r2) + + def test_dict_update(self): + # Create a region containing two dictionaries + r = Region() + d = {} + r.body = d + d2 = {} + r.body2 = d2 + # Add a contained region to the first dictionary + d["reg"] = Region() + # Update the second dictionary to contain the elements of the first + self.assertRaises(RegionError, d2.update, d) + self.assertRaises(RegionError, d2.update, d) \ No newline at end of file diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 7c37de1211c691..be391750e8db7a 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2960,7 +2960,7 @@ dict_merge(PyInterpreterState *interp, PyObject *a, PyObject *b, int override) USABLE_FRACTION(DK_SIZE(okeys)/2) < other->ma_used)) { uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_CLONED, mp, b, NULL); - PyDictKeysObject *keys = clone_combined_dict_keys(other); + PyDictKeysObject *keys = clone_combined_dict_keys(other); // Need to say what owns the keys? if (keys == NULL) { return -1; } @@ -3455,6 +3455,8 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) return NULL; if (ix == DKIX_EMPTY) { + if (!Py_REGIONADDREFERENCE(mp, defaultobj)) + return NULL; uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, defaultobj); mp->ma_keys->dk_version = 0; @@ -3474,8 +3476,6 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) Py_ssize_t index = (int)mp->ma_keys->dk_nentries; assert(index < SHARED_KEYS_MAX_SIZE); assert(mp->ma_values->values[index] == NULL); - if (!Py_REGIONADDREFERENCE(mp, value)) - return NULL; mp->ma_values->values[index] = Py_NewRef(value); _PyDictValues_AddToInsertionOrder(mp->ma_values, index); } @@ -3497,7 +3497,7 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) assert(mp->ma_keys->dk_usable >= 0); } else if (value == NULL) { - if (!Py_REGIONADDREFERENCE(mp, value)) + if (!Py_REGIONADDREFERENCE(mp, defaultobj)) return NULL; uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, defaultobj); From 708f3681dae49602c25d627a8292b552bb15b0e9 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Wed, 15 Jan 2025 17:39:27 +0000 Subject: [PATCH 11/14] Handle dict.update(...) correctly --- Include/internal/pycore_regions.h | 3 +++ Objects/dictobject.c | 21 +++++++++++++++------ Objects/regions.c | 12 ++++++++++++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_regions.h b/Include/internal/pycore_regions.h index 43982752bc908f..1637c55ecf0caf 100644 --- a/Include/internal/pycore_regions.h +++ b/Include/internal/pycore_regions.h @@ -60,6 +60,9 @@ PyObject* _Py_ResetInvariant(void); bool _Py_RegionAddReference(PyObject* src, PyObject* new_tgt); #define Py_REGIONADDREFERENCE(a, b) _Py_RegionAddReference(_PyObject_CAST(a), b) +void _Py_RegionAddLocalReference(PyObject* new_tgt); +#define Py_REGIONADDLOCALREFERENCE(b) _Py_RegionAddLocalReference(b) + // Helper macros to count the number of arguments #define _COUNT_ARGS(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, N, ...) N #define COUNT_ARGS(...) _COUNT_ARGS(__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index be391750e8db7a..5556795b1d919b 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -793,8 +793,12 @@ new_dict_with_shared_keys(PyInterpreterState *interp, PyDictKeysObject *keys) } +/* The target represents the dictionary that this object will become part of. + If target is NULL, the object is not part of a freshly allocated dictionary, so should + be considered as part of te local region. +*/ static PyDictKeysObject * -clone_combined_dict_keys(PyDictObject *orig) +clone_combined_dict_keys(PyDictObject *orig, PyObject* target) { assert(PyDict_Check(orig)); assert(Py_TYPE(orig)->tp_iter == (getiterfunc)dict_iter); @@ -835,9 +839,14 @@ clone_combined_dict_keys(PyDictObject *orig) if (value != NULL) { Py_INCREF(value); Py_INCREF(*pkey); - // TODO need to add_reference here. We don't know the underlying region - // but it should be freshly allocated, so can we consider it the local region - // TODO discuss before merging PR. + if (target != NULL) { + if (!Py_REGIONADDREFERENCES(target, *pkey, value)) + return NULL; + } + else { + Py_REGIONADDLOCALREFERENCE(*pkey); + Py_REGIONADDLOCALREFERENCE(value); + } } pvalue += offs; pkey += offs; @@ -2960,7 +2969,7 @@ dict_merge(PyInterpreterState *interp, PyObject *a, PyObject *b, int override) USABLE_FRACTION(DK_SIZE(okeys)/2) < other->ma_used)) { uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_CLONED, mp, b, NULL); - PyDictKeysObject *keys = clone_combined_dict_keys(other); // Need to say what owns the keys? + PyDictKeysObject *keys = clone_combined_dict_keys(other, a); // Need to say what owns the keys? if (keys == NULL) { return -1; } @@ -3195,7 +3204,7 @@ PyDict_Copy(PyObject *o) operations and copied after that. In cases like this, we defer to PyDict_Merge, which produces a compacted copy. */ - PyDictKeysObject *keys = clone_combined_dict_keys(mp); + PyDictKeysObject *keys = clone_combined_dict_keys(mp, NULL); if (keys == NULL) { return NULL; } diff --git a/Objects/regions.c b/Objects/regions.c index ea578e8c5fc812..7f138973965bd3 100644 --- a/Objects/regions.c +++ b/Objects/regions.c @@ -1935,6 +1935,18 @@ bool _Py_RegionAddReference(PyObject *src, PyObject *tgt) { return add_to_region(tgt, Py_REGION(src)) == Py_None; } +// Used to add a reference from a local object that might not have been created yet +// to tgt. +void _Py_RegionAddLocalReference(PyObject *tgt) { + // Only need to increment the LRC of the target region + // if it is not local, immutable, or a cown. + if (_Py_IsLocal(tgt) || Py_IsImmutable(tgt) || _Py_IsCown(tgt)) { + return; + } + + Py_REGION_DATA(tgt)->lrc += 1; +} + // Convenience function for moving multiple references into tgt at once bool _Py_RegionAddReferences(PyObject *src, int tgtc, ...) { va_list args; From 3aebba0f030f9f837de2f5b5f6bc2d1df71a2cf5 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Wed, 15 Jan 2025 17:46:17 +0000 Subject: [PATCH 12/14] Make Py_CLEAR_OBJECT_FIELD same as Py_CLEAR with extra remove --- Include/internal/pycore_regions.h | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_regions.h b/Include/internal/pycore_regions.h index 1637c55ecf0caf..72a839b4dbc8e0 100644 --- a/Include/internal/pycore_regions.h +++ b/Include/internal/pycore_regions.h @@ -90,12 +90,30 @@ int _PyCown_release(PyObject *self); int _PyCown_is_released(PyObject *self); -#define Py_CLEAR_OBJECT_FIELD(obj, field) do { \ - PyObject *tmp = (PyObject *)(field); \ - (field) = NULL; \ - Py_REGIONREMOVEREFERENCE(obj, tmp); \ - Py_XDECREF(tmp); \ -} while (0) +#ifdef _Py_TYPEOF +#define Py_CLEAR_OBJECT_FIELD(op, field) \ + do { \ + _Py_TYPEOF(op)* _tmp_op_ptr = &(op); \ + _Py_TYPEOF(op) _tmp_old_op = (*_tmp_op_ptr); \ + if (_tmp_old_op != NULL) { \ + *_tmp_op_ptr = _Py_NULL; \ + Py_REGIONREMOVEREFERENCE(op, _tmp_old_op); \ + Py_DECREF(_tmp_old_op); \ + } \ + } while (0) +#else +#define Py_CLEAR_OBJECT_FIELD(op, field) \ + do { \ + PyObject **_tmp_op_ptr = _Py_CAST(PyObject**, &(op)); \ + PyObject *_tmp_old_op = (*_tmp_op_ptr); \ + if (_tmp_old_op != NULL) { \ + PyObject *_null_ptr = _Py_NULL; \ + memcpy(_tmp_op_ptr, &_null_ptr, sizeof(PyObject*)); \ + Py_REGIONREMOVEREFERENCE(op, _tmp_old_op); \ + Py_DECREF(_tmp_old_op); \ + } \ + } while (0) +#endif #ifdef __cplusplus } From 8ddf4861090b5e3a5b4a56bba38f13b9959e23d4 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Wed, 15 Jan 2025 17:48:04 +0000 Subject: [PATCH 13/14] Format --- Lib/test/test_regions_dictobject.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_regions_dictobject.py b/Lib/test/test_regions_dictobject.py index 40ba4da24773ea..d2d4164b34a670 100644 --- a/Lib/test/test_regions_dictobject.py +++ b/Lib/test/test_regions_dictobject.py @@ -77,4 +77,4 @@ def test_dict_update(self): d["reg"] = Region() # Update the second dictionary to contain the elements of the first self.assertRaises(RegionError, d2.update, d) - self.assertRaises(RegionError, d2.update, d) \ No newline at end of file + self.assertRaises(RegionError, d2.update, d) From ea27948e41c59fa3d3eb06e3f8bfe7a8479d60c4 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 17 Jan 2025 10:44:30 +0000 Subject: [PATCH 14/14] Fix PyClear variant. --- Include/internal/pycore_regions.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Include/internal/pycore_regions.h b/Include/internal/pycore_regions.h index 72a839b4dbc8e0..aeb06bf517afc8 100644 --- a/Include/internal/pycore_regions.h +++ b/Include/internal/pycore_regions.h @@ -93,24 +93,24 @@ int _PyCown_is_released(PyObject *self); #ifdef _Py_TYPEOF #define Py_CLEAR_OBJECT_FIELD(op, field) \ do { \ - _Py_TYPEOF(op)* _tmp_op_ptr = &(op); \ - _Py_TYPEOF(op) _tmp_old_op = (*_tmp_op_ptr); \ - if (_tmp_old_op != NULL) { \ - *_tmp_op_ptr = _Py_NULL; \ - Py_REGIONREMOVEREFERENCE(op, _tmp_old_op); \ - Py_DECREF(_tmp_old_op); \ + _Py_TYPEOF(op)* _tmp_field_ptr = &(field); \ + _Py_TYPEOF(op) _tmp_old_field = (*_tmp_field_ptr); \ + if (_tmp_old_field != NULL) { \ + *_tmp_field_ptr = _Py_NULL; \ + Py_REGIONREMOVEREFERENCE(op, _tmp_old_field); \ + Py_DECREF(_tmp_old_field); \ } \ } while (0) #else #define Py_CLEAR_OBJECT_FIELD(op, field) \ do { \ - PyObject **_tmp_op_ptr = _Py_CAST(PyObject**, &(op)); \ - PyObject *_tmp_old_op = (*_tmp_op_ptr); \ - if (_tmp_old_op != NULL) { \ + PyObject **_tmp_field_ptr = _Py_CAST(PyObject**, &(op)); \ + PyObject *_tmp_old_field = (*_tmp_field_ptr); \ + if (_tmp_old_field != NULL) { \ PyObject *_null_ptr = _Py_NULL; \ - memcpy(_tmp_op_ptr, &_null_ptr, sizeof(PyObject*)); \ - Py_REGIONREMOVEREFERENCE(op, _tmp_old_op); \ - Py_DECREF(_tmp_old_op); \ + memcpy(_tmp_field_ptr, &_null_ptr, sizeof(PyObject*)); \ + Py_REGIONREMOVEREFERENCE(op, _tmp_old_field); \ + Py_DECREF(_tmp_old_field); \ } \ } while (0) #endif