ck_epoch: Bug fixes and performance improvements.

- ck_epoch_begin: Disallow early load of epoch as it leads to measurable
  performance degradation in some benchmarks.
- ck_epoch_synchronize: Enforce barrier semantics.
ck_pring
Samy Al Bahra 9 years ago
parent 87cdb9ea19
commit 2686ca0223

@ -125,22 +125,30 @@ ck_epoch_begin(ck_epoch_record_t *record, ck_epoch_section_t *section)
* section. * section.
*/ */
if (record->active == 0) { if (record->active == 0) {
unsigned int g_epoch = ck_pr_load_uint(&epoch->epoch); unsigned int g_epoch;
/* /*
* It is possible for loads to be re-ordered before the store * It is possible for loads to be re-ordered before the store
* is committed into the caller's epoch and active fields. * is committed into the caller's epoch and active fields.
* For this reason, store to load serialization is necessary. * For this reason, store to load serialization is necessary.
*/ */
ck_pr_store_uint(&record->epoch, g_epoch);
#if defined(CK_MD_TSO) #if defined(CK_MD_TSO)
ck_pr_fas_uint(&record->active, 1); ck_pr_fas_uint(&record->active, 1);
ck_pr_fence_atomic_load(); ck_pr_fence_atomic_load();
#else #else
ck_pr_store_uint(&record->active, 1); ck_pr_store_uint(&record->active, 1);
ck_pr_fence_store_load(); ck_pr_fence_memory();
#endif #endif
/*
* This load is allowed to be re-ordered prior to setting
* active flag due to monotonic nature of the global epoch.
* However, stale values lead to measurable performance
* degradation in some torture tests so we disallow early load
* of global epoch.
*/
g_epoch = ck_pr_load_uint(&epoch->epoch);
ck_pr_store_uint(&record->epoch, g_epoch);
} else { } else {
ck_pr_store_uint(&record->active, record->active + 1); ck_pr_store_uint(&record->active, record->active + 1);
} }

@ -392,10 +392,9 @@ ck_epoch_synchronize(struct ck_epoch_record *record)
bool active; bool active;
/* /*
* Technically, we are vulnerable to an overflow in presence of * If UINT_MAX concurrent mutations were to occur then
* multiple writers. Realistically, this will require UINT_MAX scans. * it is possible to encounter an ABA-issue. If this is a concern,
* You can use epoch-protected sections on the writer-side if this is a * consider tuning write-side concurrency.
* concern.
*/ */
delta = epoch = ck_pr_load_uint(&global->epoch); delta = epoch = ck_pr_load_uint(&global->epoch);
goal = epoch + CK_EPOCH_GRACE; goal = epoch + CK_EPOCH_GRACE;
@ -408,9 +407,11 @@ ck_epoch_synchronize(struct ck_epoch_record *record)
ck_pr_fence_memory(); ck_pr_fence_memory();
for (i = 0, cr = NULL; i < CK_EPOCH_GRACE - 1; cr = NULL, i++) { for (i = 0, cr = NULL; i < CK_EPOCH_GRACE - 1; cr = NULL, i++) {
bool r;
/* /*
* Determine whether all threads have observed the current * Determine whether all threads have observed the current
* epoch. We can get away without a fence here. * epoch with respect to the updates on invocation.
*/ */
while (cr = ck_epoch_scan(global, cr, delta, &active), while (cr = ck_epoch_scan(global, cr, delta, &active),
cr != NULL) { cr != NULL) {
@ -447,11 +448,18 @@ ck_epoch_synchronize(struct ck_epoch_record *record)
* it is possible to overflow the epoch value if we apply * it is possible to overflow the epoch value if we apply
* modulo-3 arithmetic. * modulo-3 arithmetic.
*/ */
if (ck_pr_cas_uint_value(&global->epoch, delta, delta + 1, r = ck_pr_cas_uint_value(&global->epoch, delta, delta + 1,
&delta) == true) { &delta);
delta = delta + 1;
/* Order subsequent thread active checks. */
ck_pr_fence_atomic_load();
/*
* If CAS has succeeded, then set delta to latest snapshot.
* Otherwise, we have just acquired latest snapshot.
*/
delta = delta + r;
continue; continue;
}
reload: reload:
if ((goal > epoch) & (delta >= goal)) { if ((goal > epoch) & (delta >= goal)) {
@ -467,6 +475,7 @@ reload:
} }
} }
ck_pr_fence_release();
record->epoch = delta; record->epoch = delta;
return; return;
} }

Loading…
Cancel
Save