From 2686ca0223e8db422f9538ee9ed4fb77beabd2b1 Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Tue, 29 Dec 2015 19:11:51 -0500 Subject: [PATCH] ck_epoch: Bug fixes and performance improvements. - ck_epoch_begin: Disallow early load of epoch as it leads to measurable performance degradation in some benchmarks. - ck_epoch_synchronize: Enforce barrier semantics. --- include/ck_epoch.h | 16 ++++++++++++---- src/ck_epoch.c | 29 +++++++++++++++++++---------- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/include/ck_epoch.h b/include/ck_epoch.h index b492288..e7ce5bc 100644 --- a/include/ck_epoch.h +++ b/include/ck_epoch.h @@ -125,22 +125,30 @@ ck_epoch_begin(ck_epoch_record_t *record, ck_epoch_section_t *section) * section. */ if (record->active == 0) { - unsigned int g_epoch = ck_pr_load_uint(&epoch->epoch); + unsigned int g_epoch; /* * It is possible for loads to be re-ordered before the store * is committed into the caller's epoch and active fields. * For this reason, store to load serialization is necessary. */ - ck_pr_store_uint(&record->epoch, g_epoch); - #if defined(CK_MD_TSO) ck_pr_fas_uint(&record->active, 1); ck_pr_fence_atomic_load(); #else ck_pr_store_uint(&record->active, 1); - ck_pr_fence_store_load(); + ck_pr_fence_memory(); #endif + + /* + * This load is allowed to be re-ordered prior to setting + * active flag due to monotonic nature of the global epoch. + * However, stale values lead to measurable performance + * degradation in some torture tests so we disallow early load + * of global epoch. + */ + g_epoch = ck_pr_load_uint(&epoch->epoch); + ck_pr_store_uint(&record->epoch, g_epoch); } else { ck_pr_store_uint(&record->active, record->active + 1); } diff --git a/src/ck_epoch.c b/src/ck_epoch.c index d9b99d2..7e7df2d 100644 --- a/src/ck_epoch.c +++ b/src/ck_epoch.c @@ -392,10 +392,9 @@ ck_epoch_synchronize(struct ck_epoch_record *record) bool active; /* - * Technically, we are vulnerable to an overflow in presence of - * multiple writers. Realistically, this will require UINT_MAX scans. - * You can use epoch-protected sections on the writer-side if this is a - * concern. + * If UINT_MAX concurrent mutations were to occur then + * it is possible to encounter an ABA-issue. If this is a concern, + * consider tuning write-side concurrency. */ delta = epoch = ck_pr_load_uint(&global->epoch); goal = epoch + CK_EPOCH_GRACE; @@ -408,9 +407,11 @@ ck_epoch_synchronize(struct ck_epoch_record *record) ck_pr_fence_memory(); for (i = 0, cr = NULL; i < CK_EPOCH_GRACE - 1; cr = NULL, i++) { + bool r; + /* * Determine whether all threads have observed the current - * epoch. We can get away without a fence here. + * epoch with respect to the updates on invocation. */ while (cr = ck_epoch_scan(global, cr, delta, &active), cr != NULL) { @@ -447,11 +448,18 @@ ck_epoch_synchronize(struct ck_epoch_record *record) * it is possible to overflow the epoch value if we apply * modulo-3 arithmetic. */ - if (ck_pr_cas_uint_value(&global->epoch, delta, delta + 1, - &delta) == true) { - delta = delta + 1; - continue; - } + r = ck_pr_cas_uint_value(&global->epoch, delta, delta + 1, + &delta); + + /* Order subsequent thread active checks. */ + ck_pr_fence_atomic_load(); + + /* + * If CAS has succeeded, then set delta to latest snapshot. + * Otherwise, we have just acquired latest snapshot. + */ + delta = delta + r; + continue; reload: if ((goal > epoch) & (delta >= goal)) { @@ -467,6 +475,7 @@ reload: } } + ck_pr_fence_release(); record->epoch = delta; return; }