From b75ae27a95da59157aa5a5b5d94ab2e5f2122d8a Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Tue, 16 May 2023 03:02:29 +0200 Subject: [PATCH 1/2] Track HashTableIterators for copy-on-write copies of HashTables When executing a foreach ($ht as &$ref), foreach calls zend_hash_iterator_pos_ex() on every iteration. If the HashTable contained in the $ht variable is not the tracked HashTable, it will reset the position to the internal array pointer of the array currently in $ht. This behaviour is generally fine, but undesirable for copy-on-write copies of the iterated HashTable. This may trivially occur when the iterated over HashTable is assigned to some variable, then the iterated over variable modified, leading to array separation, changing the HashTable pointer in the variable. Thus foreach happily restarting iteration. This behaviour (despite existing since PHP 7.0) is considered a bug, if not only for the behaviour being unexpected to the user, also copy-on-write should not have trivially observable side-effects by mere assignment. The bugfix consists of duplicating HashTableIterators whenever zend_array_dup() is called (the primitive used on array separation). When a further access to the HashPosition through the HashTableIterators API happens and the HashTable does not match the tracked one, all the duplicates (which are tracked by single linked list) are searched for the wanted HashTable. If found, the HashTableIterator is replaced by the found copy and all other copies are removed. This ensures that we always end up tracking the correct HashTable. Fixes GH-11244. Signed-off-by: Bob Weinand --- Zend/tests/gh11222.phpt | 2 +- Zend/tests/gh11244-001.phpt | 22 +++++++ Zend/tests/gh11244-002.phpt | 22 +++++++ Zend/tests/gh11244-003.phpt | 23 ++++++++ Zend/tests/gh11244-004.phpt | 18 ++++++ Zend/tests/gh11244-005.phpt | 48 +++++++++++++++ Zend/zend_hash.c | 114 +++++++++++++++++++++++++++++++++--- Zend/zend_types.h | 1 + 8 files changed, 241 insertions(+), 9 deletions(-) create mode 100644 Zend/tests/gh11244-001.phpt create mode 100644 Zend/tests/gh11244-002.phpt create mode 100644 Zend/tests/gh11244-003.phpt create mode 100644 Zend/tests/gh11244-004.phpt create mode 100644 Zend/tests/gh11244-005.phpt diff --git a/Zend/tests/gh11222.phpt b/Zend/tests/gh11222.phpt index c2c2b5eb4881a..5f4eedf8d4b95 100644 --- a/Zend/tests/gh11222.phpt +++ b/Zend/tests/gh11222.phpt @@ -1,5 +1,5 @@ --TEST-- -GH-112222: foreach by-ref may jump over keys during a rehash +GH-11222: foreach by-ref may jump over keys during a rehash --FILE-- &$value) { + echo "$value\n"; + if ($value === 1) { + $cow_copy = $data; + echo "unset $value\n"; + unset($data[$key]); + } +} + +?> +--EXPECTF-- +0 +1 +unset 1 +2 diff --git a/Zend/tests/gh11244-002.phpt b/Zend/tests/gh11244-002.phpt new file mode 100644 index 0000000000000..0b48e3090fced --- /dev/null +++ b/Zend/tests/gh11244-002.phpt @@ -0,0 +1,22 @@ +--TEST-- +GH-11244: Modifying a copied by-ref iterated array resets the array position (not packed) +--FILE-- + 0, 1, 2]; + +foreach ($data as $key => &$value) { + echo "$value\n"; + if ($value === 1) { + $cow_copy = $data; + echo "unset $value\n"; + unset($data[$key]); + } +} + +?> +--EXPECTF-- +0 +1 +unset 1 +2 diff --git a/Zend/tests/gh11244-003.phpt b/Zend/tests/gh11244-003.phpt new file mode 100644 index 0000000000000..d368ad7949643 --- /dev/null +++ b/Zend/tests/gh11244-003.phpt @@ -0,0 +1,23 @@ +--TEST-- +GH-11244: Modifying a copied by-ref iterated array resets the array position (not packed with holes) +--FILE-- + 0, 1, 2, 3]; +unset($data[1]); + +foreach ($data as $key => &$value) { + echo "$value\n"; + if ($value === 1) { + $cow_copy = $data; + echo "unset $value\n"; + unset($data[$key]); + } +} + +?> +--EXPECTF-- +0 +1 +unset 1 +3 diff --git a/Zend/tests/gh11244-004.phpt b/Zend/tests/gh11244-004.phpt new file mode 100644 index 0000000000000..6b3a5768ae0b6 --- /dev/null +++ b/Zend/tests/gh11244-004.phpt @@ -0,0 +1,18 @@ +--TEST-- +GH-11244: Modifying a copied by-ref iterated array resets the array position (with object) +--FILE-- + $v) { + echo "$p : $v\n"; + $clone = clone $obj; + $ref = &$obj->$p; +} + +?> +--EXPECTF-- +0 : 1 +1 : 2 +2 : 3 diff --git a/Zend/tests/gh11244-005.phpt b/Zend/tests/gh11244-005.phpt new file mode 100644 index 0000000000000..2cbbfd82ca503 --- /dev/null +++ b/Zend/tests/gh11244-005.phpt @@ -0,0 +1,48 @@ +--TEST-- +GH-11244: Modifying a copied by-ref iterated array resets the array position (multiple copies) +--FILE-- + &$value) { + echo "$value\n"; + if ($value === 1) { + $cow_copy = [$data, $data, $data]; + echo "unset $value\n"; + unset($cow_copy[0][$key]); + unset($data[$key]); + unset($cow_copy[2][$key]); + } +} + +print_r($cow_copy); + +?> +--EXPECTF-- +0 +1 +unset 1 +2 +Array +( + [0] => Array + ( + [0] => 0 + [2] => 2 + ) + + [1] => Array + ( + [0] => 0 + [1] => 1 + [2] => 2 + ) + + [2] => Array + ( + [0] => 0 + [2] => 2 + ) + +) diff --git a/Zend/zend_hash.c b/Zend/zend_hash.c index a8571af75c906..bb4098a00c0aa 100644 --- a/Zend/zend_hash.c +++ b/Zend/zend_hash.c @@ -514,6 +514,21 @@ ZEND_API HashPosition ZEND_FASTCALL zend_hash_get_current_pos(const HashTable *h return _zend_hash_get_current_pos(ht); } +static void zend_hash_remove_iterator_copies(uint32_t idx) { + HashTableIterator *iterators = EG(ht_iterators); + + HashTableIterator *iter = iterators + idx; + uint32_t next_idx = iter->next_copy; + while (next_idx != idx) { + uint32_t cur_idx = next_idx; + HashTableIterator *cur_iter = iterators + cur_idx; + next_idx = cur_iter->next_copy; + cur_iter->next_copy = cur_idx; // avoid recursion in zend_hash_iterator_del + zend_hash_iterator_del(cur_idx); + } + iter->next_copy = idx; +} + ZEND_API uint32_t ZEND_FASTCALL zend_hash_iterator_add(HashTable *ht, HashPosition pos) { HashTableIterator *iter = EG(ht_iterators); @@ -528,6 +543,7 @@ ZEND_API uint32_t ZEND_FASTCALL zend_hash_iterator_add(HashTable *ht, HashPositi iter->ht = ht; iter->pos = pos; idx = iter - EG(ht_iterators); + iter->next_copy = idx; if (idx + 1 > EG(ht_iterators_used)) { EG(ht_iterators_used) = idx + 1; } @@ -547,16 +563,49 @@ ZEND_API uint32_t ZEND_FASTCALL zend_hash_iterator_add(HashTable *ht, HashPositi iter->pos = pos; memset(iter + 1, 0, sizeof(HashTableIterator) * 7); idx = iter - EG(ht_iterators); + iter->next_copy = idx; EG(ht_iterators_used) = idx + 1; return idx; } +// To avoid losing track of the HashTable when separating arrays, we track all copies at once. +static zend_always_inline bool zend_hash_iterator_find_copy_pos(uint32_t idx, HashTable *ht) { + HashTableIterator *iter = EG(ht_iterators) + idx; + + uint32_t next_idx = iter->next_copy; + if (EXPECTED(next_idx != idx)) { + HashTableIterator *copy_iter; + while (next_idx != idx) { + copy_iter = EG(ht_iterators) + next_idx; + if (copy_iter->ht == ht) { + // We have found the hashtable we are actually iterating over + // Now clean any intermittent copies and replace the original index by the found one + if (EXPECTED(iter->ht) && EXPECTED(iter->ht != HT_POISONED_PTR) + && EXPECTED(!HT_ITERATORS_OVERFLOW(iter->ht))) { + HT_DEC_ITERATORS_COUNT(iter->ht); + } + if (EXPECTED(!HT_ITERATORS_OVERFLOW(ht))) { + HT_INC_ITERATORS_COUNT(ht); + } + iter->ht = copy_iter->ht; + iter->pos = copy_iter->pos; + zend_hash_remove_iterator_copies(idx); + return true; + } + next_idx = copy_iter->next_copy; + } + zend_hash_remove_iterator_copies(idx); + } + + return false; +} + ZEND_API HashPosition ZEND_FASTCALL zend_hash_iterator_pos(uint32_t idx, HashTable *ht) { HashTableIterator *iter = EG(ht_iterators) + idx; ZEND_ASSERT(idx != (uint32_t)-1); - if (UNEXPECTED(iter->ht != ht)) { + if (UNEXPECTED(iter->ht != ht) && !zend_hash_iterator_find_copy_pos(idx, ht)) { if (EXPECTED(iter->ht) && EXPECTED(iter->ht != HT_POISONED_PTR) && EXPECTED(!HT_ITERATORS_OVERFLOW(iter->ht))) { HT_DEC_ITERATORS_COUNT(iter->ht); @@ -576,7 +625,7 @@ ZEND_API HashPosition ZEND_FASTCALL zend_hash_iterator_pos_ex(uint32_t idx, zval HashTableIterator *iter = EG(ht_iterators) + idx; ZEND_ASSERT(idx != (uint32_t)-1); - if (UNEXPECTED(iter->ht != ht)) { + if (UNEXPECTED(iter->ht != ht) && !zend_hash_iterator_find_copy_pos(idx, ht)) { if (EXPECTED(iter->ht) && EXPECTED(iter->ht != HT_POISONED_PTR) && EXPECTED(!HT_ITERATORS_OVERFLOW(ht))) { HT_DEC_ITERATORS_COUNT(iter->ht); @@ -605,6 +654,10 @@ ZEND_API void ZEND_FASTCALL zend_hash_iterator_del(uint32_t idx) } iter->ht = NULL; + if (UNEXPECTED(iter->next_copy != idx)) { + zend_hash_remove_iterator_copies(idx); + } + if (idx == EG(ht_iterators_used) - 1) { while (idx > 0 && EG(ht_iterators)[idx - 1].ht == NULL) { idx--; @@ -2286,6 +2339,22 @@ static zend_always_inline bool zend_array_dup_element(HashTable *source, HashTab return 1; } +// We need to duplicate iterators to be able to search through all copy-on-write copies to find the actually iterated HashTable and position back +static void zend_array_dup_ht_iterators(HashTable *source, HashTable *target) { + HashTableIterator *iter = EG(ht_iterators); + HashTableIterator *end = iter + EG(ht_iterators_used); + + while (iter != end) { + if (iter->ht == source) { + uint32_t copy_idx = zend_hash_iterator_add(target, iter->pos); + HashTableIterator *copy_iter = EG(ht_iterators) + copy_idx; + copy_iter->next_copy = iter->next_copy; + iter->next_copy = copy_idx; + } + iter++; + } +} + static zend_always_inline void zend_array_dup_packed_elements(HashTable *source, HashTable *target, bool with_holes) { zval *p = source->arPacked; @@ -2300,6 +2369,10 @@ static zend_always_inline void zend_array_dup_packed_elements(HashTable *source, } p++; q++; } while (p != end); + + if (UNEXPECTED(HT_HAS_ITERATORS(source))) { + zend_array_dup_ht_iterators(source, target); + } } static zend_always_inline uint32_t zend_array_dup_elements(HashTable *source, HashTable *target, bool static_keys, bool with_holes) @@ -2309,19 +2382,44 @@ static zend_always_inline uint32_t zend_array_dup_elements(HashTable *source, Ha Bucket *q = target->arData; Bucket *end = p + source->nNumUsed; + if (UNEXPECTED(HT_HAS_ITERATORS(source))) { + zend_array_dup_ht_iterators(source, target); + } + do { if (!zend_array_dup_element(source, target, idx, p, q, 0, static_keys, with_holes)) { uint32_t target_idx = idx; idx++; p++; - while (p != end) { - if (zend_array_dup_element(source, target, target_idx, p, q, 0, static_keys, with_holes)) { - if (source->nInternalPointer == idx) { - target->nInternalPointer = target_idx; + if (EXPECTED(!HT_HAS_ITERATORS(target))) { + while (p != end) { + if (zend_array_dup_element(source, target, target_idx, p, q, 0, static_keys, with_holes)) { + if (source->nInternalPointer == idx) { + target->nInternalPointer = target_idx; + } + target_idx++; q++; + } + idx++; p++; + } + } else { + target->nNumUsed = source->nNumOfElements; + uint32_t iter_pos = zend_hash_iterators_lower_pos(target, idx); + + while (p != end) { + if (zend_array_dup_element(source, target, target_idx, p, q, 0, static_keys, with_holes)) { + if (source->nInternalPointer == idx) { + target->nInternalPointer = target_idx; + } + if (UNEXPECTED(idx >= iter_pos)) { + do { + zend_hash_iterators_update(target, iter_pos, target_idx); + iter_pos = zend_hash_iterators_lower_pos(target, iter_pos + 1); + } while (iter_pos < idx); + } + target_idx++; q++; } - target_idx++; q++; + idx++; p++; } - idx++; p++; } return target_idx; } diff --git a/Zend/zend_types.h b/Zend/zend_types.h index c341ffa0b4d8c..8fc713ef02fae 100644 --- a/Zend/zend_types.h +++ b/Zend/zend_types.h @@ -533,6 +533,7 @@ typedef uint32_t HashPosition; typedef struct _HashTableIterator { HashTable *ht; HashPosition pos; + uint32_t next_copy; } HashTableIterator; struct _zend_object { From b740dfd6668ffea4efd8dfa4e7e0136389fae4bf Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Tue, 16 May 2023 12:35:54 +0200 Subject: [PATCH 2/2] Address CR comments Signed-off-by: Bob Weinand --- Zend/zend_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Zend/zend_types.h b/Zend/zend_types.h index 8fc713ef02fae..301c174d46dcc 100644 --- a/Zend/zend_types.h +++ b/Zend/zend_types.h @@ -533,7 +533,7 @@ typedef uint32_t HashPosition; typedef struct _HashTableIterator { HashTable *ht; HashPosition pos; - uint32_t next_copy; + uint32_t next_copy; // circular linked list via index into EG(ht_iterators) } HashTableIterator; struct _zend_object {