|
|
|
@ -39,7 +39,7 @@
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Only three distinct values are used for reclamation, but reclamation occurs
|
|
|
|
|
* at e + 2 rather than e + 1. Any thread in a "critical section" would have
|
|
|
|
|
* at e+2 rather than e+1. Any thread in a "critical section" would have
|
|
|
|
|
* acquired some snapshot (e) of the global epoch value (e_g) and set an active
|
|
|
|
|
* flag. Any hazardous references will only occur after a full memory barrier.
|
|
|
|
|
* For example, assume an initial e_g value of 1, e value of 0 and active value
|
|
|
|
@ -50,24 +50,22 @@
|
|
|
|
|
* active = 1
|
|
|
|
|
* memory_barrier();
|
|
|
|
|
*
|
|
|
|
|
* Any serialized reads may observe e = 0 or e = 1 with active = 0, or
|
|
|
|
|
* e = 0 or e = 1 with active = 1. The e_g value can only go from 1
|
|
|
|
|
* to 2 if every thread has already observed the value of "1" (or the
|
|
|
|
|
* value we are incrementing from). This guarantees us that for any
|
|
|
|
|
* given value e_g, any threads with-in critical sections (referred
|
|
|
|
|
* to as "active" threads from here on) would have an e value of
|
|
|
|
|
* e_g - 1 or e_g. This also means that hazardous references may be
|
|
|
|
|
* shared in both e_g - 1 and e_g even if they are logically deleted
|
|
|
|
|
* in e_g.
|
|
|
|
|
* Any serialized reads may observe e = 0 or e = 1 with active = 0, or e = 0 or
|
|
|
|
|
* e = 1 with active = 1. The e_g value can only go from 1 to 2 if every thread
|
|
|
|
|
* has already observed the value of "1" (or the value we are incrementing
|
|
|
|
|
* from). This guarantees us that for any given value e_g, any threads with-in
|
|
|
|
|
* critical sections (referred to as "active" threads from here on) would have
|
|
|
|
|
* an e value of e_g-1 or e_g. This also means that hazardous references may be
|
|
|
|
|
* shared in both e_g-1 and e_g even if they are logically deleted in e_g.
|
|
|
|
|
*
|
|
|
|
|
* For example, assume all threads have an e value of e_g. Another
|
|
|
|
|
* thread may increment to e_g to e_g + 1. Older threads may have
|
|
|
|
|
* a reference to an object which is only deleted in e_g + 1. It
|
|
|
|
|
* could be that reader threads are executing some hash table look-ups,
|
|
|
|
|
* while some other writer thread (which causes epoch counter tick)
|
|
|
|
|
* actually deletes the same items that reader threads are looking
|
|
|
|
|
* up (this writer thread having an e value of e_g + 1). This is possible
|
|
|
|
|
* if the writer thread re-observes the epoch after the counter tick.
|
|
|
|
|
* For example, assume all threads have an e value of e_g. Another thread may
|
|
|
|
|
* increment to e_g to e_g+1. Older threads may have a reference to an object
|
|
|
|
|
* which is only deleted in e_g+1. It could be that reader threads are
|
|
|
|
|
* executing some hash table look-ups, while some other writer thread (which
|
|
|
|
|
* causes epoch counter tick) actually deletes the same items that reader
|
|
|
|
|
* threads are looking up (this writer thread having an e value of e_g+1).
|
|
|
|
|
* This is possible if the writer thread re-observes the epoch after the
|
|
|
|
|
* counter tick.
|
|
|
|
|
*
|
|
|
|
|
* Psuedo-code for writer:
|
|
|
|
|
* ck_epoch_begin()
|
|
|
|
@ -83,49 +81,48 @@
|
|
|
|
|
* ck_pr_inc(&x->value);
|
|
|
|
|
* }
|
|
|
|
|
*
|
|
|
|
|
* Of course, it is also possible for references logically deleted
|
|
|
|
|
* at e_g - 1 to still be accessed at e_g as threads are "active"
|
|
|
|
|
* at the same time (real-world time) mutating shared objects.
|
|
|
|
|
* Of course, it is also possible for references logically deleted at e_g-1 to
|
|
|
|
|
* still be accessed at e_g as threads are "active" at the same time
|
|
|
|
|
* (real-world time) mutating shared objects.
|
|
|
|
|
*
|
|
|
|
|
* Now, if the epoch counter is ticked to e_g + 1, then no new
|
|
|
|
|
* hazardous references could exist to objects logically deleted at
|
|
|
|
|
* e_g - 1. The reason for this is that at e_g + 1, all epoch read-side
|
|
|
|
|
* critical sections started at e_g - 1 must have been completed. If
|
|
|
|
|
* any epoch read-side critical sections at e_g - 1 were still active,
|
|
|
|
|
* then we would never increment to e_g + 1 (active != 0 ^ e != e_g).
|
|
|
|
|
* Additionally, e_g may still have hazardous references to objects
|
|
|
|
|
* logically deleted at e_g - 1 which means objects logically deleted
|
|
|
|
|
* at e_g - 1 cannot be deleted at e_g + 1 unless all threads have
|
|
|
|
|
* observed e_g + 1 (since it is valid for active threads to be at e_g
|
|
|
|
|
* and threads at e_g still require safe memory accesses).
|
|
|
|
|
* Now, if the epoch counter is ticked to e_g+1, then no new hazardous
|
|
|
|
|
* references could exist to objects logically deleted at e_g-1. The reason for
|
|
|
|
|
* this is that at e_g+1, all epoch read-side critical sections started at
|
|
|
|
|
* e_g-1 must have been completed. If any epoch read-side critical sections at
|
|
|
|
|
* e_g-1 were still active, then we would never increment to e_g+1 (active != 0
|
|
|
|
|
* ^ e != e_g). Additionally, e_g may still have hazardous references to
|
|
|
|
|
* objects logically deleted at e_g-1 which means objects logically deleted at
|
|
|
|
|
* e_g-1 cannot be deleted at e_g+1 unless all threads have observed e_g+1
|
|
|
|
|
* (since it is valid for active threads to be at e_g and threads at e_g still
|
|
|
|
|
* require safe memory accesses).
|
|
|
|
|
*
|
|
|
|
|
* However, at e_g + 2, all active threads must be either at e_g + 1 or
|
|
|
|
|
* e_g + 2. Though e_g + 2 may share hazardous references with e_g + 1,
|
|
|
|
|
* and e_g + 1 shares hazardous references to e_g, no active threads are
|
|
|
|
|
* at e_g or e_g - 1. This means no hazardous references could exist to
|
|
|
|
|
* objects deleted at e_g - 1 (at e_g + 2).
|
|
|
|
|
* However, at e_g+2, all active threads must be either at e_g+1 or e_g+2.
|
|
|
|
|
* Though e_g+2 may share hazardous references with e_g+1, and e_g+1 shares
|
|
|
|
|
* hazardous references to e_g, no active threads are at e_g or e_g-1. This
|
|
|
|
|
* means no hazardous references could exist to objects deleted at e_g-1 (at
|
|
|
|
|
* e_g+2).
|
|
|
|
|
*
|
|
|
|
|
* To summarize these important points,
|
|
|
|
|
* 1) Active threads will always have a value of e_g or e_g - 1.
|
|
|
|
|
* 2) Items that are logically deleted e_g or e_g - 1 cannot be
|
|
|
|
|
* physically deleted.
|
|
|
|
|
* 3) Objects logically deleted at e_g - 1 can be physically destroyed
|
|
|
|
|
* at e_g + 2 or at e_g + 1 if no threads are at e_g.
|
|
|
|
|
* 1) Active threads will always have a value of e_g or e_g-1.
|
|
|
|
|
* 2) Items that are logically deleted e_g or e_g-1 cannot be physically
|
|
|
|
|
* deleted.
|
|
|
|
|
* 3) Objects logically deleted at e_g-1 can be physically destroyed at e_g+2
|
|
|
|
|
* or at e_g+1 if no threads are at e_g.
|
|
|
|
|
*
|
|
|
|
|
* Last but not least, if we are at e_g + 2, then no active thread is at
|
|
|
|
|
* e_g which means it is safe to apply modulo-3 arithmetic to e_g value
|
|
|
|
|
* in order to re-use e_g to represent the e_g + 3 state. This means it is
|
|
|
|
|
* sufficient to represent e_g using only the values 0, 1 or 2. Every time
|
|
|
|
|
* a thread re-visits a e_g (which can be determined with a non-empty deferral
|
|
|
|
|
* list) it can assume objects in the e_g deferral list involved at least
|
|
|
|
|
* three e_g transitions and are thus, safe, for physical deletion.
|
|
|
|
|
* Last but not least, if we are at e_g+2, then no active thread is at e_g
|
|
|
|
|
* which means it is safe to apply modulo-3 arithmetic to e_g value in order to
|
|
|
|
|
* re-use e_g to represent the e_g+3 state. This means it is sufficient to
|
|
|
|
|
* represent e_g using only the values 0, 1 or 2. Every time a thread re-visits
|
|
|
|
|
* a e_g (which can be determined with a non-empty deferral list) it can assume
|
|
|
|
|
* objects in the e_g deferral list involved at least three e_g transitions and
|
|
|
|
|
* are thus, safe, for physical deletion.
|
|
|
|
|
*
|
|
|
|
|
* Blocking semantics for epoch reclamation have additional restrictions.
|
|
|
|
|
* Though we only require three deferral lists, reasonable blocking semantics
|
|
|
|
|
* must be able to more gracefully handle bursty write work-loads which could
|
|
|
|
|
* easily cause e_g wrap-around if modulo-3 arithmetic is used. This allows for
|
|
|
|
|
* easy-to-trigger live-lock situations. The work-around to this is to not apply
|
|
|
|
|
* modulo arithmetic to e_g but only to deferral list indexing.
|
|
|
|
|
* easy-to-trigger live-lock situations. The work-around to this is to not
|
|
|
|
|
* apply modulo arithmetic to e_g but only to deferral list indexing.
|
|
|
|
|
*/
|
|
|
|
|
#define CK_EPOCH_GRACE 3U
|
|
|
|
|
|
|
|
|
@ -164,7 +161,8 @@ ck_epoch_recycle(struct ck_epoch *global)
|
|
|
|
|
if (ck_pr_load_uint(&record->state) == CK_EPOCH_STATE_FREE) {
|
|
|
|
|
/* Serialize with respect to deferral list clean-up. */
|
|
|
|
|
ck_pr_fence_load();
|
|
|
|
|
state = ck_pr_fas_uint(&record->state, CK_EPOCH_STATE_USED);
|
|
|
|
|
state = ck_pr_fas_uint(&record->state,
|
|
|
|
|
CK_EPOCH_STATE_USED);
|
|
|
|
|
if (state == CK_EPOCH_STATE_FREE) {
|
|
|
|
|
ck_pr_dec_uint(&global->n_free);
|
|
|
|
|
return record;
|
|
|
|
@ -264,7 +262,8 @@ ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e)
|
|
|
|
|
ck_stack_init(&record->pending[epoch]);
|
|
|
|
|
|
|
|
|
|
for (cursor = head; cursor != NULL; cursor = next) {
|
|
|
|
|
struct ck_epoch_entry *entry = ck_epoch_entry_container(cursor);
|
|
|
|
|
struct ck_epoch_entry *entry =
|
|
|
|
|
ck_epoch_entry_container(cursor);
|
|
|
|
|
|
|
|
|
|
next = CK_STACK_NEXT(cursor);
|
|
|
|
|
entry->function(entry);
|
|
|
|
@ -304,9 +303,10 @@ ck_epoch_synchronize(struct ck_epoch *global, struct ck_epoch_record *record)
|
|
|
|
|
bool active;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Technically, we are vulnerable to an overflow in presence of multiple
|
|
|
|
|
* writers. Realistically, this will require 2^32 scans. You can use
|
|
|
|
|
* epoch-protected sections on the writer-side if this is a concern.
|
|
|
|
|
* Technically, we are vulnerable to an overflow in presence of
|
|
|
|
|
* multiple writers. Realistically, this will require 2^32 scans. You
|
|
|
|
|
* can use epoch-protected sections on the writer-side if this is a
|
|
|
|
|
* concern.
|
|
|
|
|
*/
|
|
|
|
|
delta = epoch = ck_pr_load_uint(&global->epoch);
|
|
|
|
|
goal = epoch + CK_EPOCH_GRACE;
|
|
|
|
@ -319,15 +319,18 @@ ck_epoch_synchronize(struct ck_epoch *global, struct ck_epoch_record *record)
|
|
|
|
|
|
|
|
|
|
for (i = 0, cr = NULL; i < CK_EPOCH_GRACE - 1; cr = NULL, i++) {
|
|
|
|
|
/*
|
|
|
|
|
* Determine whether all threads have observed the current epoch.
|
|
|
|
|
* We can get away without a fence here.
|
|
|
|
|
* Determine whether all threads have observed the current
|
|
|
|
|
* epoch. We can get away without a fence here.
|
|
|
|
|
*/
|
|
|
|
|
while (cr = ck_epoch_scan(global, cr, delta, &active), cr != NULL) {
|
|
|
|
|
unsigned int e_d;
|
|
|
|
|
|
|
|
|
|
ck_pr_stall();
|
|
|
|
|
|
|
|
|
|
/* Another writer may have already observed a grace period. */
|
|
|
|
|
/*
|
|
|
|
|
* Another writer may have already observed a grace
|
|
|
|
|
* period.
|
|
|
|
|
*/
|
|
|
|
|
e_d = ck_pr_load_uint(&global->epoch);
|
|
|
|
|
if (e_d != delta) {
|
|
|
|
|
delta = e_d;
|
|
|
|
@ -347,13 +350,14 @@ ck_epoch_synchronize(struct ck_epoch *global, struct ck_epoch_record *record)
|
|
|
|
|
* increment operations for synchronization that occurs for the
|
|
|
|
|
* same global epoch value snapshot.
|
|
|
|
|
*
|
|
|
|
|
* If we can guarantee there will only be one active barrier
|
|
|
|
|
* or epoch tick at a given time, then it is sufficient to
|
|
|
|
|
* use an increment operation. In a multi-barrier workload,
|
|
|
|
|
* however, it is possible to overflow the epoch value if we
|
|
|
|
|
* apply modulo-3 arithmetic.
|
|
|
|
|
* If we can guarantee there will only be one active barrier or
|
|
|
|
|
* epoch tick at a given time, then it is sufficient to use an
|
|
|
|
|
* increment operation. In a multi-barrier workload, however,
|
|
|
|
|
* it is possible to overflow the epoch value if we apply
|
|
|
|
|
* modulo-3 arithmetic.
|
|
|
|
|
*/
|
|
|
|
|
if (ck_pr_cas_uint_value(&global->epoch, delta, delta + 1, &delta) == true) {
|
|
|
|
|
if (ck_pr_cas_uint_value(&global->epoch, delta, delta + 1,
|
|
|
|
|
&delta) == true) {
|
|
|
|
|
delta = delta + 1;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
@ -361,11 +365,12 @@ ck_epoch_synchronize(struct ck_epoch *global, struct ck_epoch_record *record)
|
|
|
|
|
reload:
|
|
|
|
|
if ((goal > epoch) & (delta >= goal)) {
|
|
|
|
|
/*
|
|
|
|
|
* Right now, epoch overflow is handled as an edge case. If
|
|
|
|
|
* we have already observed an epoch generation, then we can
|
|
|
|
|
* be sure no hazardous references exist to objects from this
|
|
|
|
|
* generation. We can actually avoid an addtional scan step
|
|
|
|
|
* at this point.
|
|
|
|
|
* Right now, epoch overflow is handled as an edge
|
|
|
|
|
* case. If we have already observed an epoch
|
|
|
|
|
* generation, then we can be sure no hazardous
|
|
|
|
|
* references exist to objects from this generation. We
|
|
|
|
|
* can actually avoid an addtional scan step at this
|
|
|
|
|
* point.
|
|
|
|
|
*/
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
@ -386,8 +391,8 @@ ck_epoch_barrier(struct ck_epoch *global, struct ck_epoch_record *record)
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* It may be worth it to actually apply these deferral semantics to an epoch
|
|
|
|
|
* that was observed at ck_epoch_call time. The problem is that the latter would
|
|
|
|
|
* require a full fence.
|
|
|
|
|
* that was observed at ck_epoch_call time. The problem is that the latter
|
|
|
|
|
* would require a full fence.
|
|
|
|
|
*
|
|
|
|
|
* ck_epoch_call will dispatch to the latest epoch snapshot that was observed.
|
|
|
|
|
* There are cases where it will fail to reclaim as early as it could. If this
|
|
|
|
@ -402,7 +407,7 @@ ck_epoch_poll(struct ck_epoch *global, struct ck_epoch_record *record)
|
|
|
|
|
unsigned int epoch = ck_pr_load_uint(&global->epoch);
|
|
|
|
|
unsigned int snapshot;
|
|
|
|
|
|
|
|
|
|
/* Serialize record epoch snapshots with respect to global epoch load. */
|
|
|
|
|
/* Serialize epoch snapshots with respect to global epoch. */
|
|
|
|
|
ck_pr_fence_memory();
|
|
|
|
|
cr = ck_epoch_scan(global, cr, epoch, &active);
|
|
|
|
|
if (cr != NULL) {
|
|
|
|
@ -420,7 +425,8 @@ ck_epoch_poll(struct ck_epoch *global, struct ck_epoch_record *record)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If an active thread exists, rely on epoch observation. */
|
|
|
|
|
if (ck_pr_cas_uint_value(&global->epoch, epoch, epoch + 1, &snapshot) == false) {
|
|
|
|
|
if (ck_pr_cas_uint_value(&global->epoch, epoch, epoch + 1,
|
|
|
|
|
&snapshot) == false) {
|
|
|
|
|
record->epoch = snapshot;
|
|
|
|
|
} else {
|
|
|
|
|
record->epoch = epoch + 1;
|
|
|
|
|