ck_epoch: Bug fixes and performance improvements.

- ck_epoch_begin: Disallow early load of epoch as it leads to measurable performance degradation in some benchmarks. - ck_epoch_synchronize: Enforce barrier semantics.
9 years ago · 2686ca0223
parent 87cdb9ea19
commit 2686ca0223
2 changed files with 31 additions and 14 deletions
--- a/include/ck_epoch.h
+++ b/include/ck_epoch.h
@ -125,22 +125,30 @@ ck_epoch_begin(ck_epoch_record_t *record, ck_epoch_section_t *section)
 	 * section.
 	 */
 	if (record->active == 0) {
-		unsigned int g_epoch = ck_pr_load_uint(&epoch->epoch);
+		unsigned int g_epoch;
 		/*
 		 * It is possible for loads to be re-ordered before the store
 		 * is committed into the caller's epoch and active fields.
 		 * For this reason, store to load serialization is necessary.
 		 */
 		ck_pr_store_uint(&record->epoch, g_epoch);
 #if defined(CK_MD_TSO)
 		ck_pr_fas_uint(&record->active, 1);
 		ck_pr_fence_atomic_load();
 #else
 		ck_pr_store_uint(&record->active, 1);
-		ck_pr_fence_store_load();
+		ck_pr_fence_memory();
 #endif
 		/*
 		 * This load is allowed to be re-ordered prior to setting
 		 * active flag due to monotonic nature of the global epoch.
 		 * However, stale values lead to measurable performance
 		 * degradation in some torture tests so we disallow early load
 		 * of global epoch.
 		 */
 		g_epoch = ck_pr_load_uint(&epoch->epoch);
 		ck_pr_store_uint(&record->epoch, g_epoch);
 	} else {
 		ck_pr_store_uint(&record->active, record->active + 1);
 	}
--- a/src/ck_epoch.c
+++ b/src/ck_epoch.c
@ -392,10 +392,9 @@ ck_epoch_synchronize(struct ck_epoch_record *record)
 	bool active;
 	/*
-	 * Technically, we are vulnerable to an overflow in presence of
+	 * If UINT_MAX concurrent mutations were to occur then
-	 * multiple writers. Realistically, this will require UINT_MAX scans.
+	 * it is possible to encounter an ABA-issue. If this is a concern,
-	 * You can use epoch-protected sections on the writer-side if this is a
+	 * consider tuning write-side concurrency.
 	 * concern.
 	 */
 	delta = epoch = ck_pr_load_uint(&global->epoch);
 	goal = epoch + CK_EPOCH_GRACE;
@ -408,9 +407,11 @@ ck_epoch_synchronize(struct ck_epoch_record *record)
 	ck_pr_fence_memory();
 	for (i = 0, cr = NULL; i < CK_EPOCH_GRACE - 1; cr = NULL, i++) {
 		bool r;
 		/*
 		 * Determine whether all threads have observed the current
-		 * epoch.  We can get away without a fence here.
+		 * epoch with respect to the updates on invocation.
 		 */
 		while (cr = ck_epoch_scan(global, cr, delta, &active),
 		    cr != NULL) {
@ -447,11 +448,18 @@ ck_epoch_synchronize(struct ck_epoch_record *record)
 		 * it is possible to overflow the epoch value if we apply
 		 * modulo-3 arithmetic.
 		 */
-		if (ck_pr_cas_uint_value(&global->epoch, delta, delta + 1,
+		r = ck_pr_cas_uint_value(&global->epoch, delta, delta + 1,
-		    &delta) == true) {
+		    &delta);
-			delta = delta + 1;
+
-			continue;
+		/* Order subsequent thread active checks. */
-		}
+		ck_pr_fence_atomic_load();
 		/*
 		 * If CAS has succeeded, then set delta to latest snapshot.
 		 * Otherwise, we have just acquired latest snapshot.
 		 */
 		delta = delta + r;
 		continue;
 reload:
 		if ((goal > epoch) & (delta >= goal)) {
@ -467,6 +475,7 @@ reload:
 		}
 	}
 	ck_pr_fence_release();
 	record->epoch = delta;
 	return;
 }