diff --git a/Include/internal/pycore_regions.h b/Include/internal/pycore_regions.h index 6b363abd49423d..aeb06bf517afc8 100644 --- a/Include/internal/pycore_regions.h +++ b/Include/internal/pycore_regions.h @@ -57,15 +57,21 @@ PyObject* _Py_ResetInvariant(void); #define Py_ResetInvariant() _Py_ResetInvariant() // Invariant placeholder -bool _Pyrona_AddReference(PyObject* target, PyObject* new_ref); -#define Pyrona_ADDREFERENCE(a, b) _Pyrona_AddReference(a, b) -#define Pyrona_REMOVEREFERENCE(a, b) // TODO +bool _Py_RegionAddReference(PyObject* src, PyObject* new_tgt); +#define Py_REGIONADDREFERENCE(a, b) _Py_RegionAddReference(_PyObject_CAST(a), b) + +void _Py_RegionAddLocalReference(PyObject* new_tgt); +#define Py_REGIONADDLOCALREFERENCE(b) _Py_RegionAddLocalReference(b) + // Helper macros to count the number of arguments #define _COUNT_ARGS(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, N, ...) N #define COUNT_ARGS(...) _COUNT_ARGS(__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) -bool _Pyrona_AddReferences(PyObject* target, int new_refc, ...); -#define Pyrona_ADDREFERENCES(a, ...) _Pyrona_AddReferences(a, COUNT_ARGS(__VA_ARGS__), __VA_ARGS__) +bool _Py_RegionAddReferences(PyObject* src, int new_tgtc, ...); +#define Py_REGIONADDREFERENCES(a, ...) _Py_RegionAddReferences(_PyObject_CAST(a), COUNT_ARGS(__VA_ARGS__), __VA_ARGS__) + +void _Py_RegionRemoveReference(PyObject* src, PyObject* old_tgt); +#define Py_REGIONREMOVEREFERENCE(a, b) _Py_RegionRemoveReference(_PyObject_CAST(a), b) #ifdef NDEBUG #define _Py_VPYDBG(fmt, ...) @@ -83,6 +89,32 @@ int _PyRegion_is_closed(PyObject* region); int _PyCown_release(PyObject *self); int _PyCown_is_released(PyObject *self); + +#ifdef _Py_TYPEOF +#define Py_CLEAR_OBJECT_FIELD(op, field) \ + do { \ + _Py_TYPEOF(op)* _tmp_field_ptr = &(field); \ + _Py_TYPEOF(op) _tmp_old_field = (*_tmp_field_ptr); \ + if (_tmp_old_field != NULL) { \ + *_tmp_field_ptr = _Py_NULL; \ + Py_REGIONREMOVEREFERENCE(op, _tmp_old_field); \ + Py_DECREF(_tmp_old_field); \ + } \ + } while (0) +#else +#define Py_CLEAR_OBJECT_FIELD(op, field) \ + do { \ + PyObject **_tmp_field_ptr = _Py_CAST(PyObject**, &(op)); \ + PyObject *_tmp_old_field = (*_tmp_field_ptr); \ + if (_tmp_old_field != NULL) { \ + PyObject *_null_ptr = _Py_NULL; \ + memcpy(_tmp_field_ptr, &_null_ptr, sizeof(PyObject*)); \ + Py_REGIONREMOVEREFERENCE(op, _tmp_old_field); \ + Py_DECREF(_tmp_old_field); \ + } \ + } while (0) +#endif + #ifdef __cplusplus } #endif diff --git a/Lib/test/test_regions_dictobject.py b/Lib/test/test_regions_dictobject.py new file mode 100644 index 00000000000000..d2d4164b34a670 --- /dev/null +++ b/Lib/test/test_regions_dictobject.py @@ -0,0 +1,80 @@ +import unittest + +class TestRegionsDictObject(unittest.TestCase): + def setUp(self): + enableinvariant() + + def test_dict_insert_empty_dict(self): + # Create Region with Empty dictionary + r = Region() + d = {} + r.body = d + n = {} + # Add local object to region + d["foo"] = n + self.assertTrue(r.owns_object(n)) + + def test_dict_insert_nonempty_dict(self): + # Create Region with Nonempty dictionary + r = Region() + d = {} + d["bar"] = 1 + r.body = d + # Add local object to region + n = {} + d["foo"] = n + self.assertTrue(r.owns_object(n)) + + def test_dict_update_dict(self): + # Create Region with Nonempty dictionary + r = Region() + d = {} + n1 = {} + d["foo"] = n1 + r.body = d + # Update dictionary to contain a local object + n2 = {} + d["foo"] = n2 + self.assertTrue(r.owns_object(n2)) + + def test_dict_clear(self): + # Create Region with Nonempty dictionary + r = Region() + d = {} + n = {} + d["foo"] = n + r.body = d + # Clear dictionary + d.clear() + # As LRC is not checked by the invariant, this test cannot + # check anything useful yet. + + def test_dict_copy(self): + r = Region() + d = {} + r.body = d + r2 = Region() + d["foo"] = r2 + d.copy() + + def test_dict_setdefault(self): + r = Region("outer") + d = {} + r.body = d + r2 = Region("inner") + d["foo"] = r2 + d.setdefault("foo", r2) + self.assertRaises(RegionError, d.setdefault, "bar", r2) + + def test_dict_update(self): + # Create a region containing two dictionaries + r = Region() + d = {} + r.body = d + d2 = {} + r.body2 = d2 + # Add a contained region to the first dictionary + d["reg"] = Region() + # Update the second dictionary to contain the elements of the first + self.assertRaises(RegionError, d2.update, d) + self.assertRaises(RegionError, d2.update, d) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c1084ca1a3ca89..5556795b1d919b 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -122,6 +122,7 @@ As a consequence of this, split keys have a maximum size of 16. #include "pycore_regions.h" // _PyObject_GC_TRACK() #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_regions.h" // Py_ADDREGIONREFERENCE(), ... (region) #include "stringlib/eq.h" // unicode_eq() #include "regions.h" // Py_IsImmutable() @@ -666,6 +667,10 @@ new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode) static void free_keys_object(PyInterpreterState *interp, PyDictKeysObject *keys) { + // TODO: This feels like it should remove the references in the regions + // but keys is not a Python object, so it's not clear how to do that. + // mjp: Leaving as a TODO for now. + assert(keys != Py_EMPTY_KEYS); if (DK_IS_UNICODE(keys)) { PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); @@ -788,8 +793,12 @@ new_dict_with_shared_keys(PyInterpreterState *interp, PyDictKeysObject *keys) } +/* The target represents the dictionary that this object will become part of. + If target is NULL, the object is not part of a freshly allocated dictionary, so should + be considered as part of te local region. +*/ static PyDictKeysObject * -clone_combined_dict_keys(PyDictObject *orig) +clone_combined_dict_keys(PyDictObject *orig, PyObject* target) { assert(PyDict_Check(orig)); assert(Py_TYPE(orig)->tp_iter == (getiterfunc)dict_iter); @@ -830,6 +839,14 @@ clone_combined_dict_keys(PyDictObject *orig) if (value != NULL) { Py_INCREF(value); Py_INCREF(*pkey); + if (target != NULL) { + if (!Py_REGIONADDREFERENCES(target, *pkey, value)) + return NULL; + } + else { + Py_REGIONADDLOCALREFERENCE(*pkey); + Py_REGIONADDLOCALREFERENCE(value); + } } pvalue += offs; pkey += offs; @@ -1258,6 +1275,9 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, MAINTAIN_TRACKING(mp, key, value); if (ix == DKIX_EMPTY) { + if (!Py_REGIONADDREFERENCES((PyObject*)mp, key, value)) { + goto Fail; + } uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, value); /* Insert into new slot. */ @@ -1303,6 +1323,9 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, } if (old_value != value) { + if (!Py_REGIONADDREFERENCE((PyObject*)mp, value)) { + goto Fail; + } if(DK_IS_UNICODE(mp->ma_keys)){ PyDictUnicodeEntry *ep; ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix]; @@ -1336,6 +1359,7 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, else { _PyDictEntry_SetValue(DK_ENTRIES(mp->ma_keys) + ix, value); } + Py_REGIONREMOVEREFERENCE((PyObject*)mp, old_value); } mp->ma_version_tag = new_version; } @@ -1365,6 +1389,9 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp, return -1; } + if (!Py_REGIONADDREFERENCES((PyObject*)mp, key, value)) + return -1; + uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, value); @@ -2015,6 +2042,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix, assert(ix < SHARED_KEYS_MAX_SIZE); /* Update order */ delete_index_from_values(mp->ma_values, ix); + Py_REGIONREMOVEREFERENCE(mp, old_value); ASSERT_CONSISTENT(mp); } else { @@ -2181,7 +2209,7 @@ PyDict_Clear(PyObject *op) if (oldvalues != NULL) { n = oldkeys->dk_nentries; for (i = 0; i < n; i++) - Py_CLEAR(oldvalues->values[i]); + Py_CLEAR_OBJECT_FIELD(op, oldvalues->values[i]); free_values(oldvalues); dictkeys_decref(interp, oldkeys); } @@ -2459,6 +2487,8 @@ dict_dealloc(PyDictObject *mp) Py_TRASHCAN_BEGIN(mp, dict_dealloc) if (values != NULL) { for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) { + PyObject *value = values->values[i]; + Py_REGIONREMOVEREFERENCE(mp, value); Py_XDECREF(values->values[i]); } free_values(values); @@ -2939,7 +2969,7 @@ dict_merge(PyInterpreterState *interp, PyObject *a, PyObject *b, int override) USABLE_FRACTION(DK_SIZE(okeys)/2) < other->ma_used)) { uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_CLONED, mp, b, NULL); - PyDictKeysObject *keys = clone_combined_dict_keys(other); + PyDictKeysObject *keys = clone_combined_dict_keys(other, a); // Need to say what owns the keys? if (keys == NULL) { return -1; } @@ -3142,6 +3172,13 @@ PyDict_Copy(PyObject *o) dictkeys_incref(mp->ma_keys); for (size_t i = 0; i < size; i++) { PyObject *value = mp->ma_values->values[i]; + if (!Py_REGIONADDREFERENCE(split_copy, value)) + { + // TODO: is this safe to dealloc the split_copy? + // is it in a valid enough state to be deallocated? + Py_DECREF(split_copy); + return NULL; + } split_copy->ma_values->values[i] = Py_XNewRef(value); } if (_PyObject_GC_IS_TRACKED(mp)) @@ -3167,7 +3204,7 @@ PyDict_Copy(PyObject *o) operations and copied after that. In cases like this, we defer to PyDict_Merge, which produces a compacted copy. */ - PyDictKeysObject *keys = clone_combined_dict_keys(mp); + PyDictKeysObject *keys = clone_combined_dict_keys(mp, NULL); if (keys == NULL) { return NULL; } @@ -3427,6 +3464,8 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) return NULL; if (ix == DKIX_EMPTY) { + if (!Py_REGIONADDREFERENCE(mp, defaultobj)) + return NULL; uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, defaultobj); mp->ma_keys->dk_version = 0; @@ -3467,6 +3506,8 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj) assert(mp->ma_keys->dk_usable >= 0); } else if (value == NULL) { + if (!Py_REGIONADDREFERENCE(mp, defaultobj)) + return NULL; uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_ADDED, mp, key, defaultobj); value = defaultobj; @@ -5507,7 +5548,7 @@ _PyObject_InitializeDict(PyObject *obj) return -1; } PyObject **dictptr = _PyObject_ComputedDictPointer(obj); - Pyrona_ADDREFERENCE(obj, dict); + Py_REGIONADDREFERENCE(obj, dict); *dictptr = dict; return 0; } @@ -5553,11 +5594,20 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, assert(values != NULL); assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT); - if(!Py_CHECKWRITE(obj)){ + if (!Py_CHECKWRITE(obj)){ PyErr_WriteToImmutable(obj); return -1; } + //TODO: PYRONA: The addition of the key is complex here. + // The keys PyDictKeysObject, might already have the key. Note that + // the keys PyDictKeysObject is not a PyObject. So it is unclear where + // this edge is created. + // The keys is coming from ht_cached_keys on the type object. + // This is also interesting from a race condition perspective. + // Can this be shared, should it be treated immutably when the type is? + // mjp: Leaving for a future PR. + Py_ssize_t ix = DKIX_EMPTY; if (PyUnicode_CheckExact(name)) { ix = insert_into_dictkeys(keys, name); @@ -5589,6 +5639,9 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, return PyDict_SetItem(dict, name, value); } } + if (!Py_REGIONADDREFERENCE(obj, value)) { + return -1; + } PyObject *old_value = values->values[ix]; values->values[ix] = Py_XNewRef(value); if (old_value == NULL) { @@ -5604,6 +5657,7 @@ _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, if (value == NULL) { delete_index_from_values(values, ix); } + Py_REGIONREMOVEREFERENCE(obj, old_value); Py_DECREF(old_value); } return 0; @@ -5775,7 +5829,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context) _Py_SetImmutable(dict); } else { - Pyrona_ADDREFERENCE(obj, dict); + Py_REGIONADDREFERENCE(obj, dict); dorv_ptr->dict = dict; } } @@ -5789,7 +5843,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context) _Py_SetImmutable(dict); } else { - Pyrona_ADDREFERENCE(obj, dict); + Py_REGIONADDREFERENCE(obj, dict); dorv_ptr->dict = dict; } } @@ -5817,7 +5871,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context) _Py_SetImmutable(dict); } else { - Pyrona_ADDREFERENCE(obj, dict); + Py_REGIONADDREFERENCE(obj, dict); *dictptr = dict; } } @@ -5841,7 +5895,7 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, if (dict == NULL) { dictkeys_incref(cached); dict = new_dict_with_shared_keys(interp, cached); - Pyrona_ADDREFERENCE(owner, dict); + Py_REGIONADDREFERENCE(owner, dict); if (dict == NULL) return -1; *dictptr = dict; @@ -5850,7 +5904,7 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, res = PyDict_DelItem(dict, key); } else { - if (Pyrona_ADDREFERENCES(dict, key, value)) { + if (Py_REGIONADDREFERENCES(dict, key, value)) { res = PyDict_SetItem(dict, key, value); } else { // Error is set inside ADDREFERENCE @@ -5863,19 +5917,13 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, dict = PyDict_New(); if (dict == NULL) return -1; - Pyrona_ADDREFERENCE(owner, dict); + Py_REGIONADDREFERENCE(owner, dict); *dictptr = dict; } if (value == NULL) { res = PyDict_DelItem(dict, key); } else { - // TODO: remove this once we merge Matt P's changeset to dictionary object - if (Pyrona_ADDREFERENCES(dict, key, value)) { - res = PyDict_SetItem(dict, key, value); - } else { - // Error is set inside ADDREFERENCE - return -1; - } + res = PyDict_SetItem(dict, key, value); } } ASSERT_CONSISTENT(dict); diff --git a/Objects/object.c b/Objects/object.c index b9ce043f6e4140..a9813a999fdb62 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1267,7 +1267,7 @@ _PyObject_GetDictPtr(PyObject *obj) PyErr_Clear(); return NULL; } - Pyrona_ADDREFERENCE(obj, dict); + Py_REGIONADDREFERENCE(obj, dict); dorv_ptr->dict = dict; } return &dorv_ptr->dict; @@ -1467,7 +1467,7 @@ _PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name, res = NULL; goto done; } - Pyrona_ADDREFERENCE(obj, dict); + Py_REGIONADDREFERENCE(obj, dict); dorv_ptr->dict = dict; } } diff --git a/Objects/regions.c b/Objects/regions.c index b1451e59f6647e..7f138973965bd3 100644 --- a/Objects/regions.c +++ b/Objects/regions.c @@ -44,7 +44,7 @@ static regionmetadata* regionmetadata_get_parent(regionmetadata* self); static PyObject *PyRegion_add_object(PyRegionObject *self, PyObject *args); static PyObject *PyRegion_remove_object(PyRegionObject *self, PyObject *args); static const char *get_region_name(PyObject* obj); -static void _PyErr_Region(PyObject *tgt, PyObject *new_ref, const char *msg); +static void _PyErr_Region(PyObject *src, PyObject *tgt, const char *msg); /** * Global status for performing the region check. @@ -1886,14 +1886,14 @@ PyTypeObject PyRegion_Type = { PyType_GenericNew, /* tp_new */ }; -void _PyErr_Region(PyObject *tgt, PyObject *new_ref, const char *msg) { - const char *new_ref_region_name = get_region_name(new_ref); +void _PyErr_Region(PyObject *src, PyObject *tgt, const char *msg) { const char *tgt_region_name = get_region_name(tgt); + const char *src_region_name = get_region_name(src); + PyObject *src_type_repr = PyObject_Repr(PyObject_Type(src)); + const char *src_desc = src_type_repr ? PyUnicode_AsUTF8(src_type_repr) : "<>"; PyObject *tgt_type_repr = PyObject_Repr(PyObject_Type(tgt)); const char *tgt_desc = tgt_type_repr ? PyUnicode_AsUTF8(tgt_type_repr) : "<>"; - PyObject *new_ref_type_repr = PyObject_Repr(PyObject_Type(new_ref)); - const char *new_ref_desc = new_ref_type_repr ? PyUnicode_AsUTF8(new_ref_type_repr) : "<>"; - PyErr_Format(PyExc_RuntimeError, "Error: Invalid edge %p (%s in %s) -> %p (%s in %s) %s\n", tgt, tgt_desc, tgt_region_name, new_ref, new_ref_desc, new_ref_region_name, msg); + PyErr_Format(PyExc_RuntimeError, "Error: Invalid edge %p (%s in %s) -> %p (%s in %s) %s\n", src, src_desc, src_region_name, tgt, tgt_desc, tgt_region_name, msg); } static const char *get_region_name(PyObject* obj) { @@ -1912,7 +1912,7 @@ static const char *get_region_name(PyObject* obj) { } // x.f = y ==> _Pyrona_AddReference(src=x, tgt=y) -bool _Pyrona_AddReference(PyObject *src, PyObject *tgt) { +bool _Py_RegionAddReference(PyObject *src, PyObject *tgt) { if (Py_REGION(src) == Py_REGION(tgt)) { // Nothing to do -- intra-region references are always permitted return true; @@ -1924,8 +1924,9 @@ bool _Pyrona_AddReference(PyObject *src, PyObject *tgt) { } if (_Py_IsLocal(src)) { - // Slurp emphemerally owned object into the region of the target object + // Record the borrowed reference in the LRC of the target region // _Py_VPYDBG("Added borrowed ref %p --> %p (owner: '%s')\n", tgt, new_ref, get_region_name(tgt)); + Py_REGION_DATA(tgt)->lrc += 1; return true; } @@ -1934,13 +1935,25 @@ bool _Pyrona_AddReference(PyObject *src, PyObject *tgt) { return add_to_region(tgt, Py_REGION(src)) == Py_None; } +// Used to add a reference from a local object that might not have been created yet +// to tgt. +void _Py_RegionAddLocalReference(PyObject *tgt) { + // Only need to increment the LRC of the target region + // if it is not local, immutable, or a cown. + if (_Py_IsLocal(tgt) || Py_IsImmutable(tgt) || _Py_IsCown(tgt)) { + return; + } + + Py_REGION_DATA(tgt)->lrc += 1; +} + // Convenience function for moving multiple references into tgt at once -bool _Pyrona_AddReferences(PyObject *tgt, int new_refc, ...) { +bool _Py_RegionAddReferences(PyObject *src, int tgtc, ...) { va_list args; - va_start(args, new_refc); + va_start(args, tgtc); - for (int i = 0; i < new_refc; i++) { - int res = _Pyrona_AddReference(tgt, va_arg(args, PyObject*)); + for (int i = 0; i < tgtc; i++) { + int res = _Py_RegionAddReference(src, va_arg(args, PyObject*)); if (!res) return false; } @@ -1954,3 +1967,39 @@ void _PyRegion_set_cown_parent(PyObject* bridge, PyObject* cown) { Py_XINCREF(cown); Py_XSETREF(data->cown, cown); } + +void _Py_RegionRemoveReference(PyObject *src, PyObject *tgt) { + if (Py_REGION(src) == Py_REGION(tgt)) { + // Nothing to do -- intra-region references have no accounting. + return; + } + + // If the target is local, then so must the source be. So this should + // be covered by the previous check. + assert(!_Py_IsLocal(tgt)); + + if (_Py_IsImmutable(tgt) || _Py_IsCown(tgt)) { + // Nothing to do -- removing a ref to an immutable or a cown has no additional accounting. + return; + } + + regionmetadata* tgt_md = Py_REGION_DATA(tgt); + if (_Py_IsLocal(src)) { + // Dec LRC of the previously referenced region + // TODO should this decrement be a function, if it hits zero, + // then a region could become unreachable. + tgt_md->lrc -= 1; + return; + } + + // This must be a parent reference, so we need to remove the parent reference. + regionmetadata* src_md = Py_REGION_DATA(src); + regionmetadata* tgt_parent_md = REGION_DATA_CAST(Py_region_ptr(tgt_md->parent)); + if (tgt_parent_md != src_md) { + // TODO: Could `dirty` mean this isn't an error? + _PyErr_Region(src, tgt, "(in WB/remove_ref)"); + } + + // Unparent the region. + regionmetadata_set_parent(tgt_md, NULL); +}