From 9732e2bdb3f50fb6611812f9b7c68fe4d206d797 Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Fri, 18 Apr 2014 18:55:48 +0000 Subject: [PATCH 01/31] ck_swlock: A single writer rwlock. This lock is copy-safe when the latch operations are used. Simplified write side operations lead to lower latencies than ck_rwlock for single writer workloads. --- .gitignore | 17 + include/ck_swlock.h | 374 +++++++++++++++++++ regressions/Makefile | 5 + regressions/ck_swlock/benchmark/Makefile | 17 + regressions/ck_swlock/benchmark/latency.c | 100 +++++ regressions/ck_swlock/benchmark/throughput.c | 249 ++++++++++++ regressions/ck_swlock/validate/Makefile | 17 + regressions/ck_swlock/validate/validate.c | 239 ++++++++++++ 8 files changed, 1018 insertions(+) create mode 100644 include/ck_swlock.h create mode 100644 regressions/ck_swlock/benchmark/Makefile create mode 100644 regressions/ck_swlock/benchmark/latency.c create mode 100644 regressions/ck_swlock/benchmark/throughput.c create mode 100644 regressions/ck_swlock/validate/Makefile create mode 100644 regressions/ck_swlock/validate/validate.c diff --git a/.gitignore b/.gitignore index f9e9158..58ab781 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,10 @@ build/Makefile *.so *.dSYM .*.sw[op] +GPATH +GRTAGS +GTAGS +ID regressions/ck_array/validate/serial regressions/ck_cohort/benchmark/ck_cohort.LATENCY regressions/ck_cohort/benchmark/ck_cohort.THROUGHPUT @@ -156,3 +160,16 @@ regressions/ck_rwcohort/benchmark/ck_rp.LATENCY regressions/ck_rwcohort/benchmark/ck_rp.THROUGHPUT regressions/ck_rwcohort/benchmark/ck_wp.LATENCY regressions/ck_rwcohort/benchmark/ck_wp.THROUGHPUT +regressions/ck_hs/benchmark/parallel_bytestring.delete +regressions/ck_ht/benchmark/parallel_bytestring.delete +regressions/ck_ht/benchmark/serial.delete +regressions/ck_ht/validate/serial.delete +regressions/ck_rhs/benchmark/parallel_bytestring +regressions/ck_rhs/benchmark/serial +regressions/ck_rhs/validate/serial +regressions/ck_spinlock/benchmark/ck_hclh.LATENCY +regressions/ck_spinlock/benchmark/ck_hclh.THROUGHPUT +regressions/ck_spinlock/validate/ck_hclh +regressions/ck_swlock/benchmark/latency +regressions/ck_swlock/benchmark/throughput +regressions/ck_swlock/validate/validate diff --git a/include/ck_swlock.h b/include/ck_swlock.h new file mode 100644 index 0000000..92fd16e --- /dev/null +++ b/include/ck_swlock.h @@ -0,0 +1,374 @@ +/* + * Copyright 2011-2014 Samy Al Bahra. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _CK_SWLOCK_H +#define _CK_SWLOCK_H + +#include +#include +#include +#include + +struct ck_swlock { + uint32_t writer; + uint32_t n_readers; +}; +typedef struct ck_swlock ck_swlock_t; + +#define CK_SWLOCK_INITIALIZER {0, 0} +#define CK_SWLOCK_LATCH_BIT (1 << 31) +#define CK_SWLOCK_READER_BITS (UINT32_MAX ^ CK_SWLOCK_LATCH_BIT) + +CK_CC_INLINE static void +ck_swlock_init(struct ck_swlock *rw) +{ + + rw->writer = 0; + rw->n_readers = 0; + ck_pr_fence_store(); + return; +} + +CK_CC_INLINE static void +ck_swlock_write_unlock(ck_swlock_t *rw) +{ + + ck_pr_store_32(&rw->writer, 0); + return; +} + +CK_CC_INLINE static bool +ck_swlock_locked_writer(ck_swlock_t *rw) +{ + + return ck_pr_load_32(&rw->writer); +} + +CK_CC_INLINE static void +ck_swlock_write_downgrade(ck_swlock_t *rw) +{ + + ck_pr_inc_32(&rw->n_readers); + ck_swlock_write_unlock(rw); + return; +} + +CK_CC_INLINE static bool +ck_swlock_locked(ck_swlock_t *rw) +{ + uint32_t r; + + r = ck_pr_load_32(&rw->writer); + + return ck_pr_load_32(&rw->n_readers) | r; +} + +CK_CC_INLINE static bool +ck_swlock_write_trylock(ck_swlock_t *rw) +{ + + ck_pr_store_32(&rw->writer, 1); + + ck_pr_fence_atomic_load(); + + if (ck_pr_load_32(&rw->n_readers) != 0) { + ck_swlock_write_unlock(rw); + return false; + } + + return true; +} + +CK_ELIDE_TRYLOCK_PROTOTYPE(ck_swlock_write, ck_swlock_t, + ck_swlock_locked, ck_swlock_write_trylock) + +CK_CC_INLINE static void +ck_swlock_write_lock(ck_swlock_t *rw) +{ + + ck_pr_store_32(&rw->writer, 1); + + ck_pr_fence_atomic_load(); + + while (ck_pr_load_32(&rw->n_readers) != 0) + ck_pr_stall(); + + return; +} + +CK_CC_INLINE static void +ck_swlock_write_latch(ck_swlock_t *rw) +{ + + ck_pr_store_32(&rw->writer, 1); + + ck_pr_fence_atomic_load(); + + while (ck_pr_cas_32(&rw->n_readers, 0, CK_SWLOCK_LATCH_BIT) == false) { + /* Stall until readers have seen the latch and cleared */ + ck_pr_stall(); + } + + return; +} + +CK_CC_INLINE static void +ck_swlock_write_unlatch(ck_swlock_t *rw) +{ + + ck_pr_store_32(&rw->n_readers, 0); + + ck_swlock_write_unlock(rw); + + return; +} + +CK_ELIDE_PROTOTYPE(ck_swlock_write, ck_swlock_t, + ck_swlock_locked, ck_swlock_write_lock, + ck_swlock_locked_writer, ck_swlock_write_unlock) + +CK_CC_INLINE static bool +ck_swlock_read_trylock(ck_swlock_t *rw) +{ + + if (ck_pr_load_32(&rw->writer) != 0) + return false; + + if (ck_pr_faa_32(&rw->n_readers, 1) & CK_SWLOCK_LATCH_BIT) { + return false; + } + + /* + * Serialize with respect to concurrent write + * lock operation. + */ + ck_pr_fence_atomic_load(); + + if (ck_pr_load_32(&rw->writer) == 0) { + ck_pr_fence_load(); + return true; + } + + ck_pr_dec_32(&rw->n_readers); + return false; +} + +CK_ELIDE_TRYLOCK_PROTOTYPE(ck_swlock_read, ck_swlock_t, + ck_swlock_locked_writer, ck_swlock_read_trylock) + +CK_CC_INLINE static void +ck_swlock_read_lock(ck_swlock_t *rw) +{ + + for (;;) { + while (ck_pr_load_32(&rw->writer) != 0) + ck_pr_stall(); + + ck_pr_inc_32(&rw->n_readers); + + /* + * Serialize with respect to concurrent write + * lock operation. + */ + ck_pr_fence_atomic_load(); + + if (ck_pr_load_32(&rw->writer) == 0) + break; + + ck_pr_dec_32(&rw->n_readers); + } + + /* Acquire semantics are necessary. */ + ck_pr_fence_load(); + return; +} + +CK_CC_INLINE static void +ck_swlock_read_latchlock(ck_swlock_t *rw) +{ + + for (;;) { + + while (ck_pr_load_32(&rw->writer) != 0) + ck_pr_stall(); + + if (ck_pr_faa_32(&rw->n_readers, 1) & CK_SWLOCK_LATCH_BIT) { + /* Writer has latched, stall the reader */ + continue; + } + + /* + * Serialize with respect to concurrent write + * lock operation. + */ + ck_pr_fence_atomic_load(); + + if (ck_pr_load_32(&rw->writer) == 0) + break; + + ck_pr_dec_32(&rw->n_readers); + } + + /* Acquire semantics are necessary. */ + ck_pr_fence_load(); + return; +} + + +CK_CC_INLINE static bool +ck_swlock_locked_reader(ck_swlock_t *rw) +{ + + ck_pr_fence_load(); + return (ck_pr_load_32(&rw->n_readers) & CK_SWLOCK_READER_BITS); +} + +CK_CC_INLINE static void +ck_swlock_read_unlock(ck_swlock_t *rw) +{ + + ck_pr_fence_load_atomic(); + ck_pr_dec_32(&rw->n_readers); + return; +} + +CK_ELIDE_PROTOTYPE(ck_swlock_read, ck_swlock_t, + ck_swlock_locked_writer, ck_swlock_read_lock, + ck_swlock_locked_reader, ck_swlock_read_unlock) + +/* + * Recursive writer reader-writer lock implementation. + */ +struct ck_swlock_recursive { + struct ck_swlock rw; + uint32_t wc; +}; +typedef struct ck_swlock_recursive ck_swlock_recursive_t; + +#define CK_SWLOCK_RECURSIVE_INITIALIZER {CK_SWLOCK_INITIALIZER, 0} + +CK_CC_INLINE static void +ck_swlock_recursive_write_lock(ck_swlock_recursive_t *rw) +{ + + ck_pr_store_32(&rw->rw.writer, 1); + + ck_pr_fence_atomic_load(); + + while (ck_pr_load_32(&rw->rw.n_readers) != 0) + ck_pr_stall(); + + rw->wc++; + return; +} + +CK_CC_INLINE static void +ck_swlock_recursive_write_latch(ck_swlock_recursive_t *rw) +{ + ck_pr_store_32(&rw->rw.writer, 1); + + ck_pr_fence_atomic_load(); + + while (ck_pr_cas_32(&rw->rw.n_readers, 0, CK_SWLOCK_LATCH_BIT) == false) + ck_pr_stall(); + + rw->wc++; + return; +} + +CK_CC_INLINE static bool +ck_swlock_recursive_write_trylock(ck_swlock_recursive_t *rw) +{ + + ck_pr_store_32(&rw->rw.writer, 1); + + ck_pr_fence_atomic_load(); + + if (ck_pr_load_32(&rw->rw.n_readers) != 0) { + ck_pr_store_32(&rw->rw.writer, 0); + return false; + } + + rw->wc++; + return true; +} + +CK_CC_INLINE static void +ck_swlock_recursive_write_unlock(ck_swlock_recursive_t *rw) +{ + + if (--rw->wc == 0) { + ck_pr_fence_release(); + ck_pr_store_32(&rw->rw.writer, 0); + } + + return; +} + +CK_CC_INLINE static void +ck_swlock_recursive_write_unlatch(ck_swlock_recursive_t *rw) +{ + ck_pr_store_32(&rw->rw.n_readers, 0); + + ck_swlock_recursive_write_unlock(rw); + + return; +} + + +CK_CC_INLINE static void +ck_swlock_recursive_read_lock(ck_swlock_recursive_t *rw) +{ + + ck_swlock_read_lock(&rw->rw); + return; +} + +CK_CC_INLINE static void +ck_swlock_recursive_read_latchlock(ck_swlock_recursive_t *rw) +{ + + ck_swlock_read_latchlock(&rw->rw); + return; +} + +CK_CC_INLINE static bool +ck_swlock_recursive_read_trylock(ck_swlock_recursive_t *rw) +{ + + return ck_swlock_read_trylock(&rw->rw); +} + +CK_CC_INLINE static void +ck_swlock_recursive_read_unlock(ck_swlock_recursive_t *rw) +{ + + ck_swlock_read_unlock(&rw->rw); + return; +} + +#endif /* _CK_SWLOCK_H */ + diff --git a/regressions/Makefile b/regressions/Makefile index 852efb8..e666d7a 100644 --- a/regressions/Makefile +++ b/regressions/Makefile @@ -16,6 +16,7 @@ DIR=array \ queue \ ring \ rwlock \ + swlock \ sequence \ spinlock \ stack @@ -58,6 +59,8 @@ all: $(MAKE) -C ./ck_ring/benchmark all $(MAKE) -C ./ck_rwlock/validate all $(MAKE) -C ./ck_rwlock/benchmark all + $(MAKE) -C ./ck_swlock/validate all + $(MAKE) -C ./ck_swlock/benchmark all $(MAKE) -C ./ck_pflock/validate all $(MAKE) -C ./ck_pflock/benchmark all $(MAKE) -C ./ck_hp/validate all @@ -101,6 +104,8 @@ clean: $(MAKE) -C ./ck_ring/benchmark clean $(MAKE) -C ./ck_rwlock/validate clean $(MAKE) -C ./ck_rwlock/benchmark clean + $(MAKE) -C ./ck_swlock/validate clean + $(MAKE) -C ./ck_swlock/benchmark clean $(MAKE) -C ./ck_pflock/validate clean $(MAKE) -C ./ck_pflock/benchmark clean $(MAKE) -C ./ck_hp/validate clean diff --git a/regressions/ck_swlock/benchmark/Makefile b/regressions/ck_swlock/benchmark/Makefile new file mode 100644 index 0000000..4ec728c --- /dev/null +++ b/regressions/ck_swlock/benchmark/Makefile @@ -0,0 +1,17 @@ +.PHONY: clean distribution + +OBJECTS=latency throughput + +all: $(OBJECTS) + +latency: latency.c ../../../include/ck_swlock.h + $(CC) $(CFLAGS) -o latency latency.c + +throughput: throughput.c ../../../include/ck_swlock.h + $(CC) $(CFLAGS) -o throughput throughput.c + +clean: + rm -rf *.dSYM *.exe *~ *.o $(OBJECTS) + +include ../../../build/regressions.build +CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE diff --git a/regressions/ck_swlock/benchmark/latency.c b/regressions/ck_swlock/benchmark/latency.c new file mode 100644 index 0000000..aeb3e8a --- /dev/null +++ b/regressions/ck_swlock/benchmark/latency.c @@ -0,0 +1,100 @@ +/* + * Copyright 2011-2014 Samy Al Bahra. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + +#include "../../common.h" + +#define CK_F_PR_RTM + +#ifndef STEPS +#define STEPS 2000000 +#endif + +int +main(void) +{ + uint64_t s_b, e_b, i; + ck_swlock_t swlock = CK_SWLOCK_INITIALIZER; + + for (i = 0; i < STEPS; i++) { + ck_swlock_write_lock(&swlock); + ck_swlock_write_unlock(&swlock); + } + + s_b = rdtsc(); + for (i = 0; i < STEPS; i++) { + ck_swlock_write_lock(&swlock); + ck_swlock_write_unlock(&swlock); + } + e_b = rdtsc(); + printf(" WRITE: swlock %15" PRIu64 "\n", (e_b - s_b) / STEPS); + + for (i = 0; i < STEPS; i++) { + ck_swlock_read_lock(&swlock); + ck_swlock_read_unlock(&swlock); + } + + s_b = rdtsc(); + for (i = 0; i < STEPS; i++) { + ck_swlock_read_lock(&swlock); + ck_swlock_read_unlock(&swlock); + } + e_b = rdtsc(); + printf(" READ: swlock %15" PRIu64 "\n", (e_b - s_b) / STEPS); + + for (i = 0; i < STEPS; i++) { + ck_swlock_write_latch(&swlock); + ck_swlock_write_unlatch(&swlock); + } + + s_b = rdtsc(); + for (i = 0; i < STEPS; i++) { + ck_swlock_write_latch(&swlock); + ck_swlock_write_unlatch(&swlock); + } + e_b = rdtsc(); + printf(" LATCH: swlock %15" PRIu64 "\n", (e_b - s_b) / STEPS); + + for (i = 0; i < STEPS; i++) { + ck_swlock_read_latchlock(&swlock); + ck_swlock_read_unlock(&swlock); + } + + s_b = rdtsc(); + for (i = 0; i < STEPS; i++) { + ck_swlock_read_latchlock(&swlock); + ck_swlock_read_unlock(&swlock); + } + e_b = rdtsc(); + printf(" READ_LATCHLOCK: swlock %15" PRIu64 "\n", (e_b - s_b) / STEPS); + + + return 0; +} + diff --git a/regressions/ck_swlock/benchmark/throughput.c b/regressions/ck_swlock/benchmark/throughput.c new file mode 100644 index 0000000..ba6bfd2 --- /dev/null +++ b/regressions/ck_swlock/benchmark/throughput.c @@ -0,0 +1,249 @@ +/* + * Copyright 2011-2014 Samy Al Bahra. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "../../common.h" + +#ifndef STEPS +#define STEPS 1000000 +#endif + +static int barrier; +static int threads; +static unsigned int flag CK_CC_CACHELINE; +static struct { + ck_swlock_t lock; +} rw CK_CC_CACHELINE = { + .lock = CK_SWLOCK_INITIALIZER +}; + +static struct affinity affinity; + +static void * +thread_lock(void *pun) +{ + uint64_t s_b, e_b, a, i; + uint64_t *value = pun; + + if (aff_iterate(&affinity) != 0) { + perror("ERROR: Could not affine thread"); + exit(EXIT_FAILURE); + } + + ck_pr_inc_int(&barrier); + while (ck_pr_load_int(&barrier) != threads) + ck_pr_stall(); + + for (i = 1, a = 0;; i++) { + s_b = rdtsc(); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_lock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + e_b = rdtsc(); + + a += (e_b - s_b) >> 4; + + if (ck_pr_load_uint(&flag) == 1) + break; + } + + ck_pr_inc_int(&barrier); + while (ck_pr_load_int(&barrier) != threads * 2) + ck_pr_stall(); + + *value = (a / i); + return NULL; +} + +static void * +thread_latchlock(void *pun) +{ + uint64_t s_b, e_b, a, i; + uint64_t *value = pun; + + if (aff_iterate(&affinity) != 0) { + perror("ERROR: Could not affine thread"); + exit(EXIT_FAILURE); + } + + ck_pr_inc_int(&barrier); + while (ck_pr_load_int(&barrier) != threads) + ck_pr_stall(); + + for (i = 1, a = 0;; i++) { + s_b = rdtsc(); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + ck_swlock_read_latchlock(&rw.lock); + ck_swlock_read_unlock(&rw.lock); + e_b = rdtsc(); + + a += (e_b - s_b) >> 4; + + if (ck_pr_load_uint(&flag) == 1) + break; + } + + ck_pr_inc_int(&barrier); + while (ck_pr_load_int(&barrier) != threads * 2) + ck_pr_stall(); + + *value = (a / i); + return NULL; +} + +static void +swlock_test(pthread_t *p, int d, uint64_t *latency, void *(*f)(void *), const char *label) +{ + int t; + + ck_pr_store_int(&barrier, 0); + ck_pr_store_uint(&flag, 0); + + affinity.delta = d; + affinity.request = 0; + + fprintf(stderr, "Creating threads (%s)...", label); + for (t = 0; t < threads; t++) { + if (pthread_create(&p[t], NULL, f, latency + t) != 0) { + ck_error("ERROR: Could not create thread %d\n", t); + } + } + fprintf(stderr, "done\n"); + + common_sleep(10); + ck_pr_store_uint(&flag, 1); + + fprintf(stderr, "Waiting for threads to finish acquisition regression..."); + for (t = 0; t < threads; t++) + pthread_join(p[t], NULL); + fprintf(stderr, "done\n\n"); + + for (t = 1; t <= threads; t++) + printf("%10u %20" PRIu64 "\n", t, latency[t - 1]); + + fprintf(stderr, "\n"); + return; +} + + +int +main(int argc, char *argv[]) +{ + int d; + pthread_t *p; + uint64_t *latency; + + if (argc != 3) { + ck_error("Usage: throughput \n"); + } + + threads = atoi(argv[2]); + if (threads <= 0) { + ck_error("ERROR: Threads must be a value > 0.\n"); + } + + p = malloc(sizeof(pthread_t) * threads); + if (p == NULL) { + ck_error("ERROR: Failed to initialize thread.\n"); + } + + latency = malloc(sizeof(uint64_t) * threads); + if (latency == NULL) { + ck_error("ERROR: Failed to create latency buffer.\n"); + } + + d = atoi(argv[1]); + swlock_test(p, d, latency, thread_lock, "swlock"); + swlock_test(p, d, latency, thread_latchlock, "swlock"); + + return 0; +} + diff --git a/regressions/ck_swlock/validate/Makefile b/regressions/ck_swlock/validate/Makefile new file mode 100644 index 0000000..a4f31fd --- /dev/null +++ b/regressions/ck_swlock/validate/Makefile @@ -0,0 +1,17 @@ +.PHONY: check clean distribution + +OBJECTS=validate + +all: $(OBJECTS) + +validate: validate.c ../../../include/ck_swlock.h + $(CC) $(CFLAGS) -o validate validate.c + +check: all + ./validate $(CORES) 1 + +clean: + rm -rf *.dSYM *.exe *~ *.o $(OBJECTS) + +include ../../../build/regressions.build +CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE -O0 diff --git a/regressions/ck_swlock/validate/validate.c b/regressions/ck_swlock/validate/validate.c new file mode 100644 index 0000000..8999236 --- /dev/null +++ b/regressions/ck_swlock/validate/validate.c @@ -0,0 +1,239 @@ +/* + * Copyright 2011-2014 Samy Al Bahra. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "../../common.h" + +#ifndef ITERATE +#define ITERATE 1000000 +#endif + +static struct affinity a; +static unsigned int locked; +static int nthr; +static ck_swlock_t lock = CK_SWLOCK_INITIALIZER; +static ck_swlock_recursive_t r_lock = CK_SWLOCK_RECURSIVE_INITIALIZER; + +static void * +thread_recursive(void *arg) +{ + int i = ITERATE; + unsigned int l; + unsigned int tid = *(int *) arg; + + if (aff_iterate(&a)) { + perror("ERROR: Could not affine thread"); + exit(EXIT_FAILURE); + } + + while (i--) { + if (tid == 0) { + /* Writer */ + while (ck_swlock_recursive_write_trylock(&r_lock) == false) + ck_pr_stall(); + + ck_swlock_recursive_write_lock(&r_lock); + ck_swlock_recursive_write_latch(&r_lock); + ck_swlock_recursive_write_lock(&r_lock); + + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } + + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + + l = ck_pr_load_uint(&locked); + if (l != 8) { + ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l); + } + + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } + } + ck_swlock_recursive_write_unlock(&r_lock); + ck_swlock_recursive_write_unlatch(&r_lock); + ck_swlock_recursive_write_unlock(&r_lock); + ck_swlock_recursive_write_unlock(&r_lock); + } + + ck_swlock_recursive_read_latchlock(&r_lock); + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [RD:%d]: %u != 0\n", __LINE__, l); + } + } + ck_swlock_recursive_read_unlock(&r_lock); + } + + return (NULL); +} + + +static void * +thread(void *arg) +{ + unsigned int i = ITERATE; + unsigned int l; + int tid = *(int *) arg; + + if (aff_iterate(&a)) { + perror("ERROR: Could not affine thread"); + exit(EXIT_FAILURE); + } + + while (i--) { + if (tid == 0) { + /* Writer */ + ck_swlock_write_latch(&lock); + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } + + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + + l = ck_pr_load_uint(&locked); + if (l != 8) { + ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l); + } + + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } + } + ck_swlock_write_unlatch(&lock); + } + + ck_swlock_read_latchlock(&lock); + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [RD:%d]: %u != 0 r:%x w:%d\n", __LINE__, l, lock.n_readers, lock.writer); + } + } + ck_swlock_read_unlock(&lock); + } + + return (NULL); +} + +static void +swlock_test(pthread_t *threads, void *(*f)(void *), const char *test) +{ + int i; + + fprintf(stderr, "Creating threads (%s)...", test); + for (i = 0; i < nthr; i++) { + if (pthread_create(&threads[i], NULL, f, &i)) { + ck_error("ERROR: Could not create thread %d\n", i); + } + } + fprintf(stderr, "."); + + for (i = 0; i < nthr; i++) + pthread_join(threads[i], NULL); + fprintf(stderr, "done (passed)\n"); + return; +} + +int +main(int argc, char *argv[]) +{ + pthread_t *threads; + + if (argc != 3) { + ck_error("Usage: validate \n"); + } + + nthr = atoi(argv[1]); + if (nthr <= 0) { + ck_error("ERROR: Number of threads must be greater than 0\n"); + } + + threads = malloc(sizeof(pthread_t) * nthr); + if (threads == NULL) { + ck_error("ERROR: Could not allocate thread structures\n"); + } + + a.delta = atoi(argv[2]); + + swlock_test(threads, thread, "regular"); + swlock_test(threads, thread_recursive, "recursive"); + return 0; +} + From 82f33b3fe96a527921a605e8963f3a550501db11 Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Fri, 18 Apr 2014 15:31:22 -0400 Subject: [PATCH 02/31] ck_swlock: First round fixes for ck_swlock. - Add necessary memory barriers. - Style conformance. --- include/ck_swlock.h | 58 ++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 92fd16e..ceafe85 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -1,5 +1,6 @@ /* - * Copyright 2011-2014 Samy Al Bahra. + * Copyright 2014 Samy Al Bahra. + * Copyright 2014 Jaidev Sridhar. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,6 +29,7 @@ #define _CK_SWLOCK_H #include +#include #include #include #include @@ -39,7 +41,7 @@ struct ck_swlock { typedef struct ck_swlock ck_swlock_t; #define CK_SWLOCK_INITIALIZER {0, 0} -#define CK_SWLOCK_LATCH_BIT (1 << 31) +#define CK_SWLOCK_LATCH_BIT (1UL << 31) #define CK_SWLOCK_READER_BITS (UINT32_MAX ^ CK_SWLOCK_LATCH_BIT) CK_CC_INLINE static void @@ -48,7 +50,6 @@ ck_swlock_init(struct ck_swlock *rw) rw->writer = 0; rw->n_readers = 0; - ck_pr_fence_store(); return; } @@ -56,6 +57,7 @@ CK_CC_INLINE static void ck_swlock_write_unlock(ck_swlock_t *rw) { + ck_pr_fence_release(); ck_pr_store_32(&rw->writer, 0); return; } @@ -64,6 +66,7 @@ CK_CC_INLINE static bool ck_swlock_locked_writer(ck_swlock_t *rw) { + ck_pr_fence_load(); return ck_pr_load_32(&rw->writer); } @@ -79,11 +82,8 @@ ck_swlock_write_downgrade(ck_swlock_t *rw) CK_CC_INLINE static bool ck_swlock_locked(ck_swlock_t *rw) { - uint32_t r; - - r = ck_pr_load_32(&rw->writer); - return ck_pr_load_32(&rw->n_readers) | r; + return ck_pr_load_32(&rw->n_readers) | ck_pr_load_32(&rw->writer); } CK_CC_INLINE static bool @@ -127,10 +127,9 @@ ck_swlock_write_latch(ck_swlock_t *rw) ck_pr_fence_atomic_load(); - while (ck_pr_cas_32(&rw->n_readers, 0, CK_SWLOCK_LATCH_BIT) == false) { - /* Stall until readers have seen the latch and cleared */ + /* Stall until readers have seen the latch and cleared. */ + while (ck_pr_cas_32(&rw->n_readers, 0, CK_SWLOCK_LATCH_BIT) == false) ck_pr_stall(); - } return; } @@ -140,9 +139,7 @@ ck_swlock_write_unlatch(ck_swlock_t *rw) { ck_pr_store_32(&rw->n_readers, 0); - ck_swlock_write_unlock(rw); - return; } @@ -157,9 +154,10 @@ ck_swlock_read_trylock(ck_swlock_t *rw) if (ck_pr_load_32(&rw->writer) != 0) return false; - if (ck_pr_faa_32(&rw->n_readers, 1) & CK_SWLOCK_LATCH_BIT) { + ck_pr_fence_load_atomic(); + + if (ck_pr_faa_32(&rw->n_readers, 1) & CK_SWLOCK_LATCH_BIT) return false; - } /* * Serialize with respect to concurrent write @@ -211,12 +209,15 @@ ck_swlock_read_latchlock(ck_swlock_t *rw) { for (;;) { - while (ck_pr_load_32(&rw->writer) != 0) ck_pr_stall(); + /* Writer has latched, stall the reader */ if (ck_pr_faa_32(&rw->n_readers, 1) & CK_SWLOCK_LATCH_BIT) { - /* Writer has latched, stall the reader */ + do { + ck_pr_stall(); + } while (ck_pr_load_32(&rw->n_readers) & CK_SWLOCK_LATCH_BIT); + continue; } @@ -243,14 +244,14 @@ ck_swlock_locked_reader(ck_swlock_t *rw) { ck_pr_fence_load(); - return (ck_pr_load_32(&rw->n_readers) & CK_SWLOCK_READER_BITS); + return ck_pr_load_32(&rw->n_readers) & CK_SWLOCK_READER_BITS; } CK_CC_INLINE static void ck_swlock_read_unlock(ck_swlock_t *rw) { - ck_pr_fence_load_atomic(); + ck_pr_fence_release(); ck_pr_dec_32(&rw->n_readers); return; } @@ -275,8 +276,7 @@ ck_swlock_recursive_write_lock(ck_swlock_recursive_t *rw) { ck_pr_store_32(&rw->rw.writer, 1); - - ck_pr_fence_atomic_load(); + ck_pr_fence_store_load(); while (ck_pr_load_32(&rw->rw.n_readers) != 0) ck_pr_stall(); @@ -288,9 +288,9 @@ ck_swlock_recursive_write_lock(ck_swlock_recursive_t *rw) CK_CC_INLINE static void ck_swlock_recursive_write_latch(ck_swlock_recursive_t *rw) { - ck_pr_store_32(&rw->rw.writer, 1); - ck_pr_fence_atomic_load(); + ck_pr_store_32(&rw->rw.writer, 1); + ck_pr_fence_store_load(); while (ck_pr_cas_32(&rw->rw.n_readers, 0, CK_SWLOCK_LATCH_BIT) == false) ck_pr_stall(); @@ -304,8 +304,7 @@ ck_swlock_recursive_write_trylock(ck_swlock_recursive_t *rw) { ck_pr_store_32(&rw->rw.writer, 1); - - ck_pr_fence_atomic_load(); + ck_pr_fence_store_load(); if (ck_pr_load_32(&rw->rw.n_readers) != 0) { ck_pr_store_32(&rw->rw.writer, 0); @@ -320,21 +319,20 @@ CK_CC_INLINE static void ck_swlock_recursive_write_unlock(ck_swlock_recursive_t *rw) { - if (--rw->wc == 0) { - ck_pr_fence_release(); - ck_pr_store_32(&rw->rw.writer, 0); - } + if (--rw->wc != 0) + return; + ck_pr_fence_release(); + ck_pr_store_32(&rw->rw.writer, 0); return; } CK_CC_INLINE static void ck_swlock_recursive_write_unlatch(ck_swlock_recursive_t *rw) { - ck_pr_store_32(&rw->rw.n_readers, 0); + ck_pr_store_32(&rw->rw.n_readers, 0); ck_swlock_recursive_write_unlock(rw); - return; } From a1daff18ea3e266cf3798f4b7a58f9ca378d94bb Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Fri, 18 Apr 2014 15:33:18 -0400 Subject: [PATCH 03/31] ck_swlock: Switch to TATAS style loop for latch operations. --- include/ck_swlock.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index ceafe85..22f4647 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -124,12 +124,14 @@ ck_swlock_write_latch(ck_swlock_t *rw) { ck_pr_store_32(&rw->writer, 1); - ck_pr_fence_atomic_load(); /* Stall until readers have seen the latch and cleared. */ - while (ck_pr_cas_32(&rw->n_readers, 0, CK_SWLOCK_LATCH_BIT) == false) - ck_pr_stall(); + while (ck_pr_cas_32(&rw->n_readers, 0, CK_SWLOCK_LATCH_BIT) == false) { + do { + ck_pr_stall(); + } while (ck_pr_load_uint(&rw->n_readers) != 0); + } return; } @@ -292,8 +294,11 @@ ck_swlock_recursive_write_latch(ck_swlock_recursive_t *rw) ck_pr_store_32(&rw->rw.writer, 1); ck_pr_fence_store_load(); - while (ck_pr_cas_32(&rw->rw.n_readers, 0, CK_SWLOCK_LATCH_BIT) == false) - ck_pr_stall(); + while (ck_pr_cas_32(&rw->rw.n_readers, 0, CK_SWLOCK_LATCH_BIT) == false) { + do { + ck_pr_stall(); + } while (ck_pr_load_uint(&rw->n_readers) != 0); + } rw->wc++; return; From 6fe2bba224bacafea4686960f0312888f1a67e2b Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Fri, 18 Apr 2014 15:33:51 -0400 Subject: [PATCH 04/31] ck_swlock: Fix-up TATAS loop. --- include/ck_swlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 22f4647..d1ca01f 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -297,7 +297,7 @@ ck_swlock_recursive_write_latch(ck_swlock_recursive_t *rw) while (ck_pr_cas_32(&rw->rw.n_readers, 0, CK_SWLOCK_LATCH_BIT) == false) { do { ck_pr_stall(); - } while (ck_pr_load_uint(&rw->n_readers) != 0); + } while (ck_pr_load_uint(&rw->rw.n_readers) != 0); } rw->wc++; From 63b79c7b1e29303b42f9867c1c25ee693588ced1 Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Fri, 18 Apr 2014 15:34:19 -0400 Subject: [PATCH 05/31] ck_swlock: Fix Copyright ordering. --- include/ck_swlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index d1ca01f..de83999 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -1,6 +1,6 @@ /* - * Copyright 2014 Samy Al Bahra. * Copyright 2014 Jaidev Sridhar. + * Copyright 2014 Samy Al Bahra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without From bfc9837d4f3d6da1ffabc47c2f8326d43f995caf Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Fri, 18 Apr 2014 15:36:04 -0400 Subject: [PATCH 06/31] ck_swlock: Whitespace change. --- include/ck_swlock.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index de83999..06bd14a 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -91,7 +91,6 @@ ck_swlock_write_trylock(ck_swlock_t *rw) { ck_pr_store_32(&rw->writer, 1); - ck_pr_fence_atomic_load(); if (ck_pr_load_32(&rw->n_readers) != 0) { @@ -110,7 +109,6 @@ ck_swlock_write_lock(ck_swlock_t *rw) { ck_pr_store_32(&rw->writer, 1); - ck_pr_fence_atomic_load(); while (ck_pr_load_32(&rw->n_readers) != 0) From 1997acde2204e7cb36f6da1ad4bd19e3d3015eb5 Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Fri, 18 Apr 2014 15:36:16 -0400 Subject: [PATCH 07/31] regressions: Remove optimization flags from ck_swlock. --- regressions/ck_swlock/validate/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regressions/ck_swlock/validate/Makefile b/regressions/ck_swlock/validate/Makefile index a4f31fd..54d62f2 100644 --- a/regressions/ck_swlock/validate/Makefile +++ b/regressions/ck_swlock/validate/Makefile @@ -14,4 +14,4 @@ clean: rm -rf *.dSYM *.exe *~ *.o $(OBJECTS) include ../../../build/regressions.build -CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE -O0 +CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE From 240d30c8c498c8d716c44046931ce8d8a26f8006 Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Fri, 18 Apr 2014 15:40:09 -0400 Subject: [PATCH 08/31] regressions/ck_swlock: Fix-up style and Copyright. --- regressions/ck_swlock/validate/Makefile | 2 +- regressions/ck_swlock/validate/validate.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/regressions/ck_swlock/validate/Makefile b/regressions/ck_swlock/validate/Makefile index 54d62f2..a4f31fd 100644 --- a/regressions/ck_swlock/validate/Makefile +++ b/regressions/ck_swlock/validate/Makefile @@ -14,4 +14,4 @@ clean: rm -rf *.dSYM *.exe *~ *.o $(OBJECTS) include ../../../build/regressions.build -CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE +CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE -O0 diff --git a/regressions/ck_swlock/validate/validate.c b/regressions/ck_swlock/validate/validate.c index 8999236..28250eb 100644 --- a/regressions/ck_swlock/validate/validate.c +++ b/regressions/ck_swlock/validate/validate.c @@ -1,5 +1,5 @@ /* - * Copyright 2011-2014 Samy Al Bahra. + * Copyright 2014 Jaidev Sridhar. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -131,7 +131,7 @@ thread(void *arg) { unsigned int i = ITERATE; unsigned int l; - int tid = *(int *) arg; + int tid = *(int *)arg; if (aff_iterate(&a)) { perror("ERROR: Could not affine thread"); From 1f03809acb82966885f03535fb67dd532670e6b4 Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Fri, 18 Apr 2014 21:32:42 +0000 Subject: [PATCH 09/31] ck_swlock: We shouldn't decrement n_readers when ck_swlock_read_latchlocks observers a writer if the unlatch operation sets n_readers to 0. The unlatch operation now just unsets the latch bit, we can safely decrement n_readers in ck_swlock_read_latchlocks(). + Fixes to validation tests & ELIDE coverage. --- include/ck_swlock.h | 31 ++- regressions/ck_swlock/validate/Makefile | 2 +- regressions/ck_swlock/validate/validate.c | 223 +++++++++++++++++++++- 3 files changed, 243 insertions(+), 13 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 92fd16e..d500c1e 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -139,7 +139,13 @@ CK_CC_INLINE static void ck_swlock_write_unlatch(ck_swlock_t *rw) { - ck_pr_store_32(&rw->n_readers, 0); + uint32_t snapshot = ck_pr_load_32(&rw->n_readers); + uint32_t delta = snapshot & CK_SWLOCK_READER_BITS; + + while (ck_pr_cas_32_value(&rw->n_readers, snapshot, delta, &snapshot) == false) { + delta = snapshot & CK_SWLOCK_READER_BITS; + ck_pr_stall(); + } ck_swlock_write_unlock(rw); @@ -209,14 +215,15 @@ ck_swlock_read_lock(ck_swlock_t *rw) CK_CC_INLINE static void ck_swlock_read_latchlock(ck_swlock_t *rw) { - + uint32_t n, w; for (;;) { - - while (ck_pr_load_32(&rw->writer) != 0) + ck_pr_fence_atomic_load(); + while ((w = ck_pr_load_32(&rw->writer)) != 0) { ck_pr_stall(); + } - if (ck_pr_faa_32(&rw->n_readers, 1) & CK_SWLOCK_LATCH_BIT) { - /* Writer has latched, stall the reader */ + if ((n = ck_pr_faa_32(&rw->n_readers, 1)) & CK_SWLOCK_LATCH_BIT) { + ck_pr_dec_32(&rw->n_readers); continue; } @@ -278,7 +285,7 @@ ck_swlock_recursive_write_lock(ck_swlock_recursive_t *rw) ck_pr_fence_atomic_load(); - while (ck_pr_load_32(&rw->rw.n_readers) != 0) + while (ck_pr_load_32(&rw->rw.n_readers) & CK_SWLOCK_READER_BITS != 0) ck_pr_stall(); rw->wc++; @@ -307,7 +314,7 @@ ck_swlock_recursive_write_trylock(ck_swlock_recursive_t *rw) ck_pr_fence_atomic_load(); - if (ck_pr_load_32(&rw->rw.n_readers) != 0) { + if (ck_pr_load_32(&rw->rw.n_readers) & CK_SWLOCK_READER_BITS != 0) { ck_pr_store_32(&rw->rw.writer, 0); return false; } @@ -331,7 +338,13 @@ ck_swlock_recursive_write_unlock(ck_swlock_recursive_t *rw) CK_CC_INLINE static void ck_swlock_recursive_write_unlatch(ck_swlock_recursive_t *rw) { - ck_pr_store_32(&rw->rw.n_readers, 0); + uint32_t snapshot = ck_pr_load_32(&rw->rw.n_readers); + uint32_t delta = snapshot & CK_SWLOCK_READER_BITS; + + while (ck_pr_cas_32_value(&rw->rw.n_readers, snapshot, delta, &snapshot) == false) { + delta = snapshot & CK_SWLOCK_READER_BITS; + ck_pr_stall(); + } ck_swlock_recursive_write_unlock(rw); diff --git a/regressions/ck_swlock/validate/Makefile b/regressions/ck_swlock/validate/Makefile index a4f31fd..5b5c788 100644 --- a/regressions/ck_swlock/validate/Makefile +++ b/regressions/ck_swlock/validate/Makefile @@ -14,4 +14,4 @@ clean: rm -rf *.dSYM *.exe *~ *.o $(OBJECTS) include ../../../build/regressions.build -CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE -O0 +CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE -O0 -g diff --git a/regressions/ck_swlock/validate/validate.c b/regressions/ck_swlock/validate/validate.c index 8999236..a8eb09a 100644 --- a/regressions/ck_swlock/validate/validate.c +++ b/regressions/ck_swlock/validate/validate.c @@ -125,13 +125,222 @@ thread_recursive(void *arg) return (NULL); } +#ifdef CK_F_PR_RTM +static void * +thread_rtm_adaptive(void *null CK_CC_UNUSED) +{ + unsigned int i = ITERATE; + unsigned int l; + struct ck_elide_config config = CK_ELIDE_CONFIG_DEFAULT_INITIALIZER; + struct ck_elide_stat st = CK_ELIDE_STAT_INITIALIZER; + + if (aff_iterate(&a)) { + perror("ERROR: Could not affine thread"); + exit(EXIT_FAILURE); + } + + while (i--) { + CK_ELIDE_LOCK_ADAPTIVE(ck_swlock_write, &st, &config, &lock); + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } + + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + + l = ck_pr_load_uint(&locked); + if (l != 8) { + ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l); + } + + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } + } + CK_ELIDE_UNLOCK_ADAPTIVE(ck_swlock_write, &st, &lock); + + CK_ELIDE_LOCK(ck_swlock_read, &lock); + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [RD:%d]: %u != 0\n", __LINE__, l); + } + } + CK_ELIDE_UNLOCK(ck_swlock_read, &lock); + } + + return NULL; +} + +static void * +thread_rtm_mix(void *null CK_CC_UNUSED) +{ + unsigned int i = ITERATE; + unsigned int l; + + if (aff_iterate(&a)) { + perror("ERROR: Could not affine thread"); + exit(EXIT_FAILURE); + } + + while (i--) { + if (i & 1) { + CK_ELIDE_LOCK(ck_swlock_write, &lock); + } else { + ck_swlock_write_lock(&lock); + } + + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } + + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + + l = ck_pr_load_uint(&locked); + if (l != 8) { + ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l); + } + + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } + } + + if (i & 1) { + CK_ELIDE_UNLOCK(ck_swlock_write, &lock); + } else { + ck_swlock_write_unlock(&lock); + } + + if (i & 1) { + CK_ELIDE_LOCK(ck_swlock_read, &lock); + } else { + ck_swlock_read_lock(&lock); + } + + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [RD:%d]: %u != 0\n", __LINE__, l); + } + } + + if (i & 1) { + CK_ELIDE_UNLOCK(ck_swlock_read, &lock); + } else { + ck_swlock_read_unlock(&lock); + } + } + + return (NULL); +} + +static void * +thread_rtm(void *null CK_CC_UNUSED) +{ + unsigned int i = ITERATE; + unsigned int l; + + if (aff_iterate(&a)) { + perror("ERROR: Could not affine thread"); + exit(EXIT_FAILURE); + } + + while (i--) { + CK_ELIDE_LOCK(ck_swlock_write, &lock); + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } + + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + + l = ck_pr_load_uint(&locked); + if (l != 8) { + ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l); + } + + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } + } + CK_ELIDE_UNLOCK(ck_swlock_write, &lock); + + CK_ELIDE_LOCK(ck_swlock_read, &lock); + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [RD:%d]: %u != 0\n", __LINE__, l); + } + } + CK_ELIDE_UNLOCK(ck_swlock_read, &lock); + } + + return (NULL); +} +#endif /* CK_F_PR_RTM */ static void * thread(void *arg) { unsigned int i = ITERATE; unsigned int l; - int tid = *(int *) arg; + int tid = ck_pr_load_int(arg); if (aff_iterate(&a)) { perror("ERROR: Could not affine thread"); @@ -141,6 +350,7 @@ thread(void *arg) while (i--) { if (tid == 0) { /* Writer */ + fflush(stdin); ck_swlock_write_latch(&lock); { l = ck_pr_load_uint(&locked); @@ -195,11 +405,12 @@ thread(void *arg) static void swlock_test(pthread_t *threads, void *(*f)(void *), const char *test) { - int i; + int i, tid[nthr]; fprintf(stderr, "Creating threads (%s)...", test); for (i = 0; i < nthr; i++) { - if (pthread_create(&threads[i], NULL, f, &i)) { + ck_pr_store_int(&tid[i], i); + if (pthread_create(&threads[i], NULL, f, &tid[i])) { ck_error("ERROR: Could not create thread %d\n", i); } } @@ -233,6 +444,12 @@ main(int argc, char *argv[]) a.delta = atoi(argv[2]); swlock_test(threads, thread, "regular"); + +#ifdef CK_F_PR_RTM + swlock_test(threads, thread_rtm, "rtm"); + swlock_test(threads, thread_rtm_mix, "rtm-mix"); + swlock_test(threads, thread_rtm_adaptive, "rtm-adaptive"); +#endif swlock_test(threads, thread_recursive, "recursive"); return 0; } From 3412f135df01afd0b6df51f26c98eaaa2243e13d Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Fri, 18 Apr 2014 21:57:13 +0000 Subject: [PATCH 10/31] ck_swlock: Fix makefile --- regressions/ck_swlock/validate/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regressions/ck_swlock/validate/Makefile b/regressions/ck_swlock/validate/Makefile index a4f31fd..54d62f2 100644 --- a/regressions/ck_swlock/validate/Makefile +++ b/regressions/ck_swlock/validate/Makefile @@ -14,4 +14,4 @@ clean: rm -rf *.dSYM *.exe *~ *.o $(OBJECTS) include ../../../build/regressions.build -CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE -O0 +CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE From bf8779ab13f1d8563dd1f61541e16e5a5c53943a Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Fri, 18 Apr 2014 22:02:34 +0000 Subject: [PATCH 11/31] ck_swlock: Decrement n_readers in TATAS style loop. --- include/ck_swlock.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index e67c433..63040c4 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -220,6 +220,7 @@ ck_swlock_read_latchlock(ck_swlock_t *rw) /* Writer has latched, stall the reader */ if (ck_pr_faa_32(&rw->n_readers, 1) & CK_SWLOCK_LATCH_BIT) { + ck_pr_dec_32(&rw->n_readers); do { ck_pr_stall(); } while (ck_pr_load_32(&rw->n_readers) & CK_SWLOCK_LATCH_BIT); From c7dc66bf0c8de7d4c58345da70db520d219117fc Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Fri, 18 Apr 2014 22:17:15 +0000 Subject: [PATCH 12/31] ck_swlock: Validation for normal write lock --- regressions/ck_swlock/validate/validate.c | 72 ++++++++++++++++++++++- 1 file changed, 69 insertions(+), 3 deletions(-) diff --git a/regressions/ck_swlock/validate/validate.c b/regressions/ck_swlock/validate/validate.c index 15c6a26..dfe68b5 100644 --- a/regressions/ck_swlock/validate/validate.c +++ b/regressions/ck_swlock/validate/validate.c @@ -336,7 +336,7 @@ thread_rtm(void *null CK_CC_UNUSED) #endif /* CK_F_PR_RTM */ static void * -thread(void *arg) +thread_latch(void *arg) { unsigned int i = ITERATE; unsigned int l; @@ -392,7 +392,73 @@ thread(void *arg) { l = ck_pr_load_uint(&locked); if (l != 0) { - ck_error("ERROR [RD:%d]: %u != 0 r:%x w:%d\n", __LINE__, l, lock.n_readers, lock.writer); + ck_error("ERROR [RD:%d]: %u != 0\n", __LINE__, l); + } + } + ck_swlock_read_unlock(&lock); + } + + return (NULL); +} + +static void * +thread(void *arg) +{ + unsigned int i = ITERATE; + unsigned int l; + int tid = ck_pr_load_int(arg); + + if (aff_iterate(&a)) { + perror("ERROR: Could not affine thread"); + exit(EXIT_FAILURE); + } + + while (i--) { + if (tid == 0) { + /* Writer */ + ck_swlock_write_lock(&lock); + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } + + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + + l = ck_pr_load_uint(&locked); + if (l != 8) { + ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l); + } + + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } + } + ck_swlock_write_unlock(&lock); + } + + ck_swlock_read_lock(&lock); + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [RD:%d]: %u != 0\n", __LINE__, l); } } ck_swlock_read_unlock(&lock); @@ -443,7 +509,7 @@ main(int argc, char *argv[]) a.delta = atoi(argv[2]); swlock_test(threads, thread, "regular"); - + swlock_test(threads, thread_latch, "latch"); #ifdef CK_F_PR_RTM swlock_test(threads, thread_rtm, "rtm"); swlock_test(threads, thread_rtm_mix, "rtm-mix"); From 0ee31c15577830a77ab8b6ac415fa7e6175a3de0 Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Fri, 18 Apr 2014 22:41:23 +0000 Subject: [PATCH 13/31] ck_swlock: Need barrier --- include/ck_swlock.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 63040c4..cabced2 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -188,6 +188,7 @@ CK_CC_INLINE static void ck_swlock_read_lock(ck_swlock_t *rw) { + ck_pr_fence_atomic_load(); for (;;) { while (ck_pr_load_32(&rw->writer) != 0) ck_pr_stall(); @@ -214,6 +215,8 @@ ck_swlock_read_lock(ck_swlock_t *rw) CK_CC_INLINE static void ck_swlock_read_latchlock(ck_swlock_t *rw) { + + ck_pr_fence_atomic_load(); for (;;) { while (ck_pr_load_32(&rw->writer) != 0) ck_pr_stall(); From 207194359f2c168458821e7c4905d4f0b2b5927b Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Fri, 18 Apr 2014 22:55:58 +0000 Subject: [PATCH 14/31] ck_swlock: Convert ELIDE tests to single writer --- regressions/ck_swlock/validate/validate.c | 215 +++++++++++----------- 1 file changed, 112 insertions(+), 103 deletions(-) diff --git a/regressions/ck_swlock/validate/validate.c b/regressions/ck_swlock/validate/validate.c index dfe68b5..6fc2603 100644 --- a/regressions/ck_swlock/validate/validate.c +++ b/regressions/ck_swlock/validate/validate.c @@ -55,7 +55,7 @@ thread_recursive(void *arg) { int i = ITERATE; unsigned int l; - unsigned int tid = *(int *) arg; + int tid = ck_pr_load_int(arg); if (aff_iterate(&a)) { perror("ERROR: Could not affine thread"); @@ -127,10 +127,12 @@ thread_recursive(void *arg) #ifdef CK_F_PR_RTM static void * -thread_rtm_adaptive(void *null CK_CC_UNUSED) +thread_rtm_adaptive(void *arg) { unsigned int i = ITERATE; unsigned int l; + int tid = ck_pr_load_int(arg); + struct ck_elide_config config = CK_ELIDE_CONFIG_DEFAULT_INITIALIZER; struct ck_elide_stat st = CK_ELIDE_STAT_INITIALIZER; @@ -140,42 +142,44 @@ thread_rtm_adaptive(void *null CK_CC_UNUSED) } while (i--) { - CK_ELIDE_LOCK_ADAPTIVE(ck_swlock_write, &st, &config, &lock); - { - l = ck_pr_load_uint(&locked); - if (l != 0) { - ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); - } + if (tid == 0) { + CK_ELIDE_LOCK_ADAPTIVE(ck_swlock_write, &st, &config, &lock); + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); - l = ck_pr_load_uint(&locked); - if (l != 8) { - ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l); - } + l = ck_pr_load_uint(&locked); + if (l != 8) { + ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l); + } - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); - l = ck_pr_load_uint(&locked); - if (l != 0) { - ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } } + CK_ELIDE_UNLOCK_ADAPTIVE(ck_swlock_write, &st, &lock); } - CK_ELIDE_UNLOCK_ADAPTIVE(ck_swlock_write, &st, &lock); CK_ELIDE_LOCK(ck_swlock_read, &lock); { @@ -191,10 +195,11 @@ thread_rtm_adaptive(void *null CK_CC_UNUSED) } static void * -thread_rtm_mix(void *null CK_CC_UNUSED) +thread_rtm_mix(void *arg) { unsigned int i = ITERATE; unsigned int l; + int tid = ck_pr_load_int(arg); if (aff_iterate(&a)) { perror("ERROR: Could not affine thread"); @@ -202,53 +207,54 @@ thread_rtm_mix(void *null CK_CC_UNUSED) } while (i--) { - if (i & 1) { - CK_ELIDE_LOCK(ck_swlock_write, &lock); - } else { - ck_swlock_write_lock(&lock); - } - - { - l = ck_pr_load_uint(&locked); - if (l != 0) { - ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + if (tid == 0) { + if (i & 1) { + CK_ELIDE_LOCK(ck_swlock_write, &lock); + } else { + ck_swlock_write_lock(&lock); } - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } - l = ck_pr_load_uint(&locked); - if (l != 8) { - ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l); - } + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); + l = ck_pr_load_uint(&locked); + if (l != 8) { + ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l); + } - l = ck_pr_load_uint(&locked); - if (l != 0) { - ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } } - } - if (i & 1) { - CK_ELIDE_UNLOCK(ck_swlock_write, &lock); - } else { - ck_swlock_write_unlock(&lock); + if (i & 1) { + CK_ELIDE_UNLOCK(ck_swlock_write, &lock); + } else { + ck_swlock_write_unlock(&lock); + } } - if (i & 1) { CK_ELIDE_LOCK(ck_swlock_read, &lock); } else { @@ -273,10 +279,11 @@ thread_rtm_mix(void *null CK_CC_UNUSED) } static void * -thread_rtm(void *null CK_CC_UNUSED) +thread_rtm(void *arg) { unsigned int i = ITERATE; unsigned int l; + int tid = ck_pr_load_int(arg); if (aff_iterate(&a)) { perror("ERROR: Could not affine thread"); @@ -284,42 +291,44 @@ thread_rtm(void *null CK_CC_UNUSED) } while (i--) { - CK_ELIDE_LOCK(ck_swlock_write, &lock); - { - l = ck_pr_load_uint(&locked); - if (l != 0) { - ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); - } + if (tid == 0) { + CK_ELIDE_LOCK(ck_swlock_write, &lock); + { + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); + ck_pr_inc_uint(&locked); - l = ck_pr_load_uint(&locked); - if (l != 8) { - ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l); - } + l = ck_pr_load_uint(&locked); + if (l != 8) { + ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l); + } - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); + ck_pr_dec_uint(&locked); - l = ck_pr_load_uint(&locked); - if (l != 0) { - ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + l = ck_pr_load_uint(&locked); + if (l != 0) { + ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); + } } + CK_ELIDE_UNLOCK(ck_swlock_write, &lock); } - CK_ELIDE_UNLOCK(ck_swlock_write, &lock); CK_ELIDE_LOCK(ck_swlock_read, &lock); { From 27a79623a6f4a42fd6fa21735e53d20f63b07f99 Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Fri, 18 Apr 2014 23:11:16 +0000 Subject: [PATCH 15/31] ck_swlock: Need to lock cache-line on write lock --- include/ck_swlock.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index cabced2..3e98ee4 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -90,7 +90,7 @@ CK_CC_INLINE static bool ck_swlock_write_trylock(ck_swlock_t *rw) { - ck_pr_store_32(&rw->writer, 1); + ck_pr_fas_32(&rw->writer, 1); ck_pr_fence_atomic_load(); if (ck_pr_load_32(&rw->n_readers) != 0) { @@ -108,7 +108,7 @@ CK_CC_INLINE static void ck_swlock_write_lock(ck_swlock_t *rw) { - ck_pr_store_32(&rw->writer, 1); + ck_pr_fas_32(&rw->writer, 1); ck_pr_fence_atomic_load(); while (ck_pr_load_32(&rw->n_readers) != 0) @@ -121,7 +121,7 @@ CK_CC_INLINE static void ck_swlock_write_latch(ck_swlock_t *rw) { - ck_pr_store_32(&rw->writer, 1); + ck_pr_fas_32(&rw->writer, 1); ck_pr_fence_atomic_load(); /* Stall until readers have seen the latch and cleared. */ @@ -188,7 +188,6 @@ CK_CC_INLINE static void ck_swlock_read_lock(ck_swlock_t *rw) { - ck_pr_fence_atomic_load(); for (;;) { while (ck_pr_load_32(&rw->writer) != 0) ck_pr_stall(); @@ -216,7 +215,6 @@ CK_CC_INLINE static void ck_swlock_read_latchlock(ck_swlock_t *rw) { - ck_pr_fence_atomic_load(); for (;;) { while (ck_pr_load_32(&rw->writer) != 0) ck_pr_stall(); @@ -285,7 +283,7 @@ CK_CC_INLINE static void ck_swlock_recursive_write_lock(ck_swlock_recursive_t *rw) { - ck_pr_store_32(&rw->rw.writer, 1); + ck_pr_fas_32(&rw->rw.writer, 1); ck_pr_fence_store_load(); while (ck_pr_load_32(&rw->rw.n_readers) & CK_SWLOCK_READER_BITS != 0) @@ -298,7 +296,7 @@ ck_swlock_recursive_write_lock(ck_swlock_recursive_t *rw) CK_CC_INLINE static void ck_swlock_recursive_write_latch(ck_swlock_recursive_t *rw) { - ck_pr_store_32(&rw->rw.writer, 1); + ck_pr_fas_32(&rw->rw.writer, 1); ck_pr_fence_store_load(); while (ck_pr_cas_32(&rw->rw.n_readers, 0, CK_SWLOCK_LATCH_BIT) == false) { @@ -315,7 +313,7 @@ CK_CC_INLINE static bool ck_swlock_recursive_write_trylock(ck_swlock_recursive_t *rw) { - ck_pr_store_32(&rw->rw.writer, 1); + ck_pr_fas_32(&rw->rw.writer, 1); ck_pr_fence_store_load(); if (ck_pr_load_32(&rw->rw.n_readers) & CK_SWLOCK_READER_BITS != 0) { From ca70ce684cae6a6b6985841af6134ecf6badf5ee Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Sat, 19 Apr 2014 17:50:51 -0400 Subject: [PATCH 16/31] ck_swlock: Unlatch operation should be wait-free. --- include/ck_swlock.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 3e98ee4..2a6339f 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -122,8 +122,8 @@ ck_swlock_write_latch(ck_swlock_t *rw) { ck_pr_fas_32(&rw->writer, 1); - ck_pr_fence_atomic_load(); - + ck_pr_fence_atomic(); + /* Stall until readers have seen the latch and cleared. */ while (ck_pr_cas_32(&rw->n_readers, 0, CK_SWLOCK_LATCH_BIT) == false) { do { @@ -138,14 +138,7 @@ CK_CC_INLINE static void ck_swlock_write_unlatch(ck_swlock_t *rw) { - uint32_t snapshot = ck_pr_load_32(&rw->n_readers); - uint32_t delta = snapshot & CK_SWLOCK_READER_BITS; - - while (ck_pr_cas_32_value(&rw->n_readers, snapshot, delta, &snapshot) == false) { - delta = snapshot & CK_SWLOCK_READER_BITS; - ck_pr_stall(); - } - + ck_pr_and_32(&rw->n_readers, CK_SWLOCK_READER_BITS); ck_swlock_write_unlock(rw); return; } @@ -222,6 +215,7 @@ ck_swlock_read_latchlock(ck_swlock_t *rw) /* Writer has latched, stall the reader */ if (ck_pr_faa_32(&rw->n_readers, 1) & CK_SWLOCK_LATCH_BIT) { ck_pr_dec_32(&rw->n_readers); + do { ck_pr_stall(); } while (ck_pr_load_32(&rw->n_readers) & CK_SWLOCK_LATCH_BIT); From 15a0485c01cdb53940782651104a54f42604015f Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Sat, 19 Apr 2014 20:55:45 -0400 Subject: [PATCH 17/31] ck_swlock: make ck_swlock_recursive_write_unlatch() wait-free. --- include/ck_swlock.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 2a6339f..85471f4 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -334,14 +334,8 @@ ck_swlock_recursive_write_unlock(ck_swlock_recursive_t *rw) CK_CC_INLINE static void ck_swlock_recursive_write_unlatch(ck_swlock_recursive_t *rw) { - uint32_t snapshot = ck_pr_load_32(&rw->rw.n_readers); - uint32_t delta = snapshot & CK_SWLOCK_READER_BITS; - - while (ck_pr_cas_32_value(&rw->rw.n_readers, snapshot, delta, &snapshot) == false) { - delta = snapshot & CK_SWLOCK_READER_BITS; - ck_pr_stall(); - } + ck_pr_and_32(&rw->rw.n_readers, CK_SWLOCK_READER_BITS); ck_swlock_recursive_write_unlock(rw); return; } From 0f44d50e33ac15e29a0c023d8de69777ba66d46b Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Mon, 21 Apr 2014 18:51:22 +0000 Subject: [PATCH 18/31] ck_swlock: Use single word for lock bits --- include/ck_swlock.h | 195 ++++++++----------- regressions/ck_swlock/benchmark/throughput.c | 2 +- regressions/ck_swlock/validate/validate.c | 4 +- 3 files changed, 85 insertions(+), 116 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 85471f4..4236363 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -35,21 +35,20 @@ #include struct ck_swlock { - uint32_t writer; - uint32_t n_readers; + uint32_t lock_bits; }; typedef struct ck_swlock ck_swlock_t; -#define CK_SWLOCK_INITIALIZER {0, 0} -#define CK_SWLOCK_LATCH_BIT (1UL << 31) -#define CK_SWLOCK_READER_BITS (UINT32_MAX ^ CK_SWLOCK_LATCH_BIT) +#define CK_SWLOCK_INITIALIZER {0} +#define CK_SWLOCK_WRITER_BIT (1UL << 31) +#define CK_SWLOCK_LATCH_BIT (1UL << 30) +#define CK_SWLOCK_READER_BITS (UINT32_MAX ^ (CK_SWLOCK_LATCH_BIT | CK_SWLOCK_WRITER_BIT)) CK_CC_INLINE static void ck_swlock_init(struct ck_swlock *rw) { - rw->writer = 0; - rw->n_readers = 0; + rw->lock_bits = 0; return; } @@ -57,8 +56,7 @@ CK_CC_INLINE static void ck_swlock_write_unlock(ck_swlock_t *rw) { - ck_pr_fence_release(); - ck_pr_store_32(&rw->writer, 0); + ck_pr_and_32(&rw->lock_bits, CK_SWLOCK_READER_BITS); return; } @@ -66,15 +64,14 @@ CK_CC_INLINE static bool ck_swlock_locked_writer(ck_swlock_t *rw) { - ck_pr_fence_load(); - return ck_pr_load_32(&rw->writer); + return (ck_pr_load_32(&rw->lock_bits) & CK_SWLOCK_WRITER_BIT); } CK_CC_INLINE static void ck_swlock_write_downgrade(ck_swlock_t *rw) { - ck_pr_inc_32(&rw->n_readers); + ck_pr_inc_32(&rw->lock_bits); ck_swlock_write_unlock(rw); return; } @@ -83,17 +80,16 @@ CK_CC_INLINE static bool ck_swlock_locked(ck_swlock_t *rw) { - return ck_pr_load_32(&rw->n_readers) | ck_pr_load_32(&rw->writer); + return ck_pr_load_32(&rw->lock_bits); } CK_CC_INLINE static bool ck_swlock_write_trylock(ck_swlock_t *rw) { - ck_pr_fas_32(&rw->writer, 1); - ck_pr_fence_atomic_load(); + ck_pr_or_32(&rw->lock_bits, CK_SWLOCK_WRITER_BIT); - if (ck_pr_load_32(&rw->n_readers) != 0) { + if ((ck_pr_load_32(&rw->lock_bits) & CK_SWLOCK_READER_BITS) != 0) { ck_swlock_write_unlock(rw); return false; } @@ -108,10 +104,9 @@ CK_CC_INLINE static void ck_swlock_write_lock(ck_swlock_t *rw) { - ck_pr_fas_32(&rw->writer, 1); - ck_pr_fence_atomic_load(); + ck_pr_or_32(&rw->lock_bits, CK_SWLOCK_WRITER_BIT); - while (ck_pr_load_32(&rw->n_readers) != 0) + while ((ck_pr_load_32(&rw->lock_bits) & CK_SWLOCK_READER_BITS) != 0) ck_pr_stall(); return; @@ -121,14 +116,13 @@ CK_CC_INLINE static void ck_swlock_write_latch(ck_swlock_t *rw) { - ck_pr_fas_32(&rw->writer, 1); - ck_pr_fence_atomic(); - + ck_pr_or_32(&rw->lock_bits, CK_SWLOCK_WRITER_BIT); + /* Stall until readers have seen the latch and cleared. */ - while (ck_pr_cas_32(&rw->n_readers, 0, CK_SWLOCK_LATCH_BIT) == false) { + while (ck_pr_cas_32(&rw->lock_bits, CK_SWLOCK_WRITER_BIT, (CK_SWLOCK_WRITER_BIT | CK_SWLOCK_LATCH_BIT)) == false) { do { ck_pr_stall(); - } while (ck_pr_load_uint(&rw->n_readers) != 0); + } while (ck_pr_load_uint(&rw->lock_bits) != CK_SWLOCK_WRITER_BIT); } return; @@ -138,8 +132,7 @@ CK_CC_INLINE static void ck_swlock_write_unlatch(ck_swlock_t *rw) { - ck_pr_and_32(&rw->n_readers, CK_SWLOCK_READER_BITS); - ck_swlock_write_unlock(rw); + ck_pr_store_32(&rw->lock_bits, 0); return; } @@ -151,27 +144,12 @@ CK_CC_INLINE static bool ck_swlock_read_trylock(ck_swlock_t *rw) { - if (ck_pr_load_32(&rw->writer) != 0) + if (ck_pr_faa_32(&rw->lock_bits, 1) & CK_SWLOCK_WRITER_BIT) { + ck_pr_dec_32(&rw->lock_bits); return false; - - ck_pr_fence_load_atomic(); - - if (ck_pr_faa_32(&rw->n_readers, 1) & CK_SWLOCK_LATCH_BIT) - return false; - - /* - * Serialize with respect to concurrent write - * lock operation. - */ - ck_pr_fence_atomic_load(); - - if (ck_pr_load_32(&rw->writer) == 0) { - ck_pr_fence_load(); - return true; - } - - ck_pr_dec_32(&rw->n_readers); - return false; + } + + return true; } CK_ELIDE_TRYLOCK_PROTOTYPE(ck_swlock_read, ck_swlock_t, @@ -181,62 +159,61 @@ CK_CC_INLINE static void ck_swlock_read_lock(ck_swlock_t *rw) { + uint32_t l; for (;;) { - while (ck_pr_load_32(&rw->writer) != 0) + while (ck_pr_load_32(&rw->lock_bits) & CK_SWLOCK_WRITER_BIT) ck_pr_stall(); - ck_pr_inc_32(&rw->n_readers); - - /* - * Serialize with respect to concurrent write - * lock operation. - */ - ck_pr_fence_atomic_load(); - - if (ck_pr_load_32(&rw->writer) == 0) - break; + l = ck_pr_faa_32(&rw->lock_bits, 1); - ck_pr_dec_32(&rw->n_readers); + if (!(l & CK_SWLOCK_WRITER_BIT)) + return; + + ck_pr_dec_32(&rw->lock_bits); } - /* Acquire semantics are necessary. */ - ck_pr_fence_load(); return; } -CK_CC_INLINE static void -ck_swlock_read_latchlock(ck_swlock_t *rw) +CK_CC_INLINE static bool +ck_swlock_read_trylatchlock(ck_swlock_t *rw) { - for (;;) { - while (ck_pr_load_32(&rw->writer) != 0) - ck_pr_stall(); + uint32_t l = ck_pr_load_32(&rw->lock_bits); - /* Writer has latched, stall the reader */ - if (ck_pr_faa_32(&rw->n_readers, 1) & CK_SWLOCK_LATCH_BIT) { - ck_pr_dec_32(&rw->n_readers); + if (l & CK_SWLOCK_WRITER_BIT) + return false; - do { - ck_pr_stall(); - } while (ck_pr_load_32(&rw->n_readers) & CK_SWLOCK_LATCH_BIT); + l = ck_pr_faa_32(&rw->lock_bits, 1); - continue; - } + if (!(l & CK_SWLOCK_WRITER_BIT)) + return true; + + if (!(l & CK_SWLOCK_LATCH_BIT)) + ck_pr_dec_32(&rw->lock_bits); + + return false; +} - /* - * Serialize with respect to concurrent write - * lock operation. - */ - ck_pr_fence_atomic_load(); - if (ck_pr_load_32(&rw->writer) == 0) - break; +CK_CC_INLINE static void +ck_swlock_read_latchlock(ck_swlock_t *rw) +{ + + uint32_t l; + for (;;) { + while (ck_pr_load_32(&rw->lock_bits) & CK_SWLOCK_WRITER_BIT) + ck_pr_stall(); + + l = ck_pr_faa_32(&rw->lock_bits, 1); + + if (!(l & (CK_SWLOCK_LATCH_BIT | CK_SWLOCK_WRITER_BIT))) + return; - ck_pr_dec_32(&rw->n_readers); + if (!(l & CK_SWLOCK_LATCH_BIT)) + ck_pr_dec_32(&rw->lock_bits); } - /* Acquire semantics are necessary. */ - ck_pr_fence_load(); return; } @@ -245,16 +222,14 @@ CK_CC_INLINE static bool ck_swlock_locked_reader(ck_swlock_t *rw) { - ck_pr_fence_load(); - return ck_pr_load_32(&rw->n_readers) & CK_SWLOCK_READER_BITS; + return ck_pr_load_32(&rw->lock_bits) & CK_SWLOCK_READER_BITS; } CK_CC_INLINE static void ck_swlock_read_unlock(ck_swlock_t *rw) { - ck_pr_fence_release(); - ck_pr_dec_32(&rw->n_readers); + ck_pr_dec_32(&rw->lock_bits); return; } @@ -277,28 +252,23 @@ CK_CC_INLINE static void ck_swlock_recursive_write_lock(ck_swlock_recursive_t *rw) { - ck_pr_fas_32(&rw->rw.writer, 1); - ck_pr_fence_store_load(); - - while (ck_pr_load_32(&rw->rw.n_readers) & CK_SWLOCK_READER_BITS != 0) - ck_pr_stall(); + if (++rw->wc != 1) { + return; + } - rw->wc++; + ck_swlock_write_lock(&rw->rw); return; } +/* + * In recursive mode, latch must be the inner-most acquisition + */ + CK_CC_INLINE static void ck_swlock_recursive_write_latch(ck_swlock_recursive_t *rw) { - ck_pr_fas_32(&rw->rw.writer, 1); - ck_pr_fence_store_load(); - - while (ck_pr_cas_32(&rw->rw.n_readers, 0, CK_SWLOCK_LATCH_BIT) == false) { - do { - ck_pr_stall(); - } while (ck_pr_load_uint(&rw->rw.n_readers) != 0); - } + ck_swlock_write_latch(&rw->rw); rw->wc++; return; } @@ -307,16 +277,12 @@ CK_CC_INLINE static bool ck_swlock_recursive_write_trylock(ck_swlock_recursive_t *rw) { - ck_pr_fas_32(&rw->rw.writer, 1); - ck_pr_fence_store_load(); - - if (ck_pr_load_32(&rw->rw.n_readers) & CK_SWLOCK_READER_BITS != 0) { - ck_pr_store_32(&rw->rw.writer, 0); - return false; + if (ck_swlock_write_trylock(&rw->rw) == true) { + rw->wc++; + return true; } - rw->wc++; - return true; + return false; } CK_CC_INLINE static void @@ -326,8 +292,7 @@ ck_swlock_recursive_write_unlock(ck_swlock_recursive_t *rw) if (--rw->wc != 0) return; - ck_pr_fence_release(); - ck_pr_store_32(&rw->rw.writer, 0); + ck_swlock_write_unlock(&rw->rw); return; } @@ -335,8 +300,12 @@ CK_CC_INLINE static void ck_swlock_recursive_write_unlatch(ck_swlock_recursive_t *rw) { - ck_pr_and_32(&rw->rw.n_readers, CK_SWLOCK_READER_BITS); - ck_swlock_recursive_write_unlock(rw); + if (--rw->wc != 0) { + ck_pr_store_32(&rw->rw.lock_bits, CK_SWLOCK_WRITER_BIT); + return; + } + + ck_pr_store_32(&rw->rw.lock_bits, 0); return; } diff --git a/regressions/ck_swlock/benchmark/throughput.c b/regressions/ck_swlock/benchmark/throughput.c index ba6bfd2..1b79f82 100644 --- a/regressions/ck_swlock/benchmark/throughput.c +++ b/regressions/ck_swlock/benchmark/throughput.c @@ -242,7 +242,7 @@ main(int argc, char *argv[]) d = atoi(argv[1]); swlock_test(p, d, latency, thread_lock, "swlock"); - swlock_test(p, d, latency, thread_latchlock, "swlock"); + swlock_test(p, d, latency, thread_latchlock, "swlock_latchlock"); return 0; } diff --git a/regressions/ck_swlock/validate/validate.c b/regressions/ck_swlock/validate/validate.c index 6fc2603..d7561ef 100644 --- a/regressions/ck_swlock/validate/validate.c +++ b/regressions/ck_swlock/validate/validate.c @@ -69,8 +69,8 @@ thread_recursive(void *arg) ck_pr_stall(); ck_swlock_recursive_write_lock(&r_lock); - ck_swlock_recursive_write_latch(&r_lock); ck_swlock_recursive_write_lock(&r_lock); + ck_swlock_recursive_write_latch(&r_lock); { l = ck_pr_load_uint(&locked); @@ -106,10 +106,10 @@ thread_recursive(void *arg) ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); } } - ck_swlock_recursive_write_unlock(&r_lock); ck_swlock_recursive_write_unlatch(&r_lock); ck_swlock_recursive_write_unlock(&r_lock); ck_swlock_recursive_write_unlock(&r_lock); + ck_swlock_recursive_write_unlock(&r_lock); } ck_swlock_recursive_read_latchlock(&r_lock); From e6feed1f89905ecaf720b7c6a8819b5d275c9b90 Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Mon, 21 Apr 2014 14:54:35 -0400 Subject: [PATCH 19/31] ck_swlock: Rename lock field --- include/ck_swlock.h | 58 ++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 4236363..29ac407 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -35,7 +35,7 @@ #include struct ck_swlock { - uint32_t lock_bits; + uint32_t value; }; typedef struct ck_swlock ck_swlock_t; @@ -48,7 +48,7 @@ CK_CC_INLINE static void ck_swlock_init(struct ck_swlock *rw) { - rw->lock_bits = 0; + rw->value = 0; return; } @@ -56,7 +56,7 @@ CK_CC_INLINE static void ck_swlock_write_unlock(ck_swlock_t *rw) { - ck_pr_and_32(&rw->lock_bits, CK_SWLOCK_READER_BITS); + ck_pr_and_32(&rw->value, CK_SWLOCK_READER_BITS); return; } @@ -64,14 +64,14 @@ CK_CC_INLINE static bool ck_swlock_locked_writer(ck_swlock_t *rw) { - return (ck_pr_load_32(&rw->lock_bits) & CK_SWLOCK_WRITER_BIT); + return (ck_pr_load_32(&rw->value) & CK_SWLOCK_WRITER_BIT); } CK_CC_INLINE static void ck_swlock_write_downgrade(ck_swlock_t *rw) { - ck_pr_inc_32(&rw->lock_bits); + ck_pr_inc_32(&rw->value); ck_swlock_write_unlock(rw); return; } @@ -80,16 +80,16 @@ CK_CC_INLINE static bool ck_swlock_locked(ck_swlock_t *rw) { - return ck_pr_load_32(&rw->lock_bits); + return ck_pr_load_32(&rw->value); } CK_CC_INLINE static bool ck_swlock_write_trylock(ck_swlock_t *rw) { - ck_pr_or_32(&rw->lock_bits, CK_SWLOCK_WRITER_BIT); + ck_pr_or_32(&rw->value, CK_SWLOCK_WRITER_BIT); - if ((ck_pr_load_32(&rw->lock_bits) & CK_SWLOCK_READER_BITS) != 0) { + if ((ck_pr_load_32(&rw->value) & CK_SWLOCK_READER_BITS) != 0) { ck_swlock_write_unlock(rw); return false; } @@ -104,9 +104,9 @@ CK_CC_INLINE static void ck_swlock_write_lock(ck_swlock_t *rw) { - ck_pr_or_32(&rw->lock_bits, CK_SWLOCK_WRITER_BIT); + ck_pr_or_32(&rw->value, CK_SWLOCK_WRITER_BIT); - while ((ck_pr_load_32(&rw->lock_bits) & CK_SWLOCK_READER_BITS) != 0) + while ((ck_pr_load_32(&rw->value) & CK_SWLOCK_READER_BITS) != 0) ck_pr_stall(); return; @@ -116,13 +116,13 @@ CK_CC_INLINE static void ck_swlock_write_latch(ck_swlock_t *rw) { - ck_pr_or_32(&rw->lock_bits, CK_SWLOCK_WRITER_BIT); + ck_pr_or_32(&rw->value, CK_SWLOCK_WRITER_BIT); /* Stall until readers have seen the latch and cleared. */ - while (ck_pr_cas_32(&rw->lock_bits, CK_SWLOCK_WRITER_BIT, (CK_SWLOCK_WRITER_BIT | CK_SWLOCK_LATCH_BIT)) == false) { + while (ck_pr_cas_32(&rw->value, CK_SWLOCK_WRITER_BIT, (CK_SWLOCK_WRITER_BIT | CK_SWLOCK_LATCH_BIT)) == false) { do { ck_pr_stall(); - } while (ck_pr_load_uint(&rw->lock_bits) != CK_SWLOCK_WRITER_BIT); + } while (ck_pr_load_uint(&rw->value) != CK_SWLOCK_WRITER_BIT); } return; @@ -132,7 +132,7 @@ CK_CC_INLINE static void ck_swlock_write_unlatch(ck_swlock_t *rw) { - ck_pr_store_32(&rw->lock_bits, 0); + ck_pr_store_32(&rw->value, 0); return; } @@ -144,8 +144,8 @@ CK_CC_INLINE static bool ck_swlock_read_trylock(ck_swlock_t *rw) { - if (ck_pr_faa_32(&rw->lock_bits, 1) & CK_SWLOCK_WRITER_BIT) { - ck_pr_dec_32(&rw->lock_bits); + if (ck_pr_faa_32(&rw->value, 1) & CK_SWLOCK_WRITER_BIT) { + ck_pr_dec_32(&rw->value); return false; } @@ -161,15 +161,15 @@ ck_swlock_read_lock(ck_swlock_t *rw) uint32_t l; for (;;) { - while (ck_pr_load_32(&rw->lock_bits) & CK_SWLOCK_WRITER_BIT) + while (ck_pr_load_32(&rw->value) & CK_SWLOCK_WRITER_BIT) ck_pr_stall(); - l = ck_pr_faa_32(&rw->lock_bits, 1); + l = ck_pr_faa_32(&rw->value, 1); if (!(l & CK_SWLOCK_WRITER_BIT)) return; - ck_pr_dec_32(&rw->lock_bits); + ck_pr_dec_32(&rw->value); } return; @@ -179,18 +179,18 @@ CK_CC_INLINE static bool ck_swlock_read_trylatchlock(ck_swlock_t *rw) { - uint32_t l = ck_pr_load_32(&rw->lock_bits); + uint32_t l = ck_pr_load_32(&rw->value); if (l & CK_SWLOCK_WRITER_BIT) return false; - l = ck_pr_faa_32(&rw->lock_bits, 1); + l = ck_pr_faa_32(&rw->value, 1); if (!(l & CK_SWLOCK_WRITER_BIT)) return true; if (!(l & CK_SWLOCK_LATCH_BIT)) - ck_pr_dec_32(&rw->lock_bits); + ck_pr_dec_32(&rw->value); return false; } @@ -202,16 +202,16 @@ ck_swlock_read_latchlock(ck_swlock_t *rw) uint32_t l; for (;;) { - while (ck_pr_load_32(&rw->lock_bits) & CK_SWLOCK_WRITER_BIT) + while (ck_pr_load_32(&rw->value) & CK_SWLOCK_WRITER_BIT) ck_pr_stall(); - l = ck_pr_faa_32(&rw->lock_bits, 1); + l = ck_pr_faa_32(&rw->value, 1); if (!(l & (CK_SWLOCK_LATCH_BIT | CK_SWLOCK_WRITER_BIT))) return; if (!(l & CK_SWLOCK_LATCH_BIT)) - ck_pr_dec_32(&rw->lock_bits); + ck_pr_dec_32(&rw->value); } return; @@ -222,14 +222,14 @@ CK_CC_INLINE static bool ck_swlock_locked_reader(ck_swlock_t *rw) { - return ck_pr_load_32(&rw->lock_bits) & CK_SWLOCK_READER_BITS; + return ck_pr_load_32(&rw->value) & CK_SWLOCK_READER_BITS; } CK_CC_INLINE static void ck_swlock_read_unlock(ck_swlock_t *rw) { - ck_pr_dec_32(&rw->lock_bits); + ck_pr_dec_32(&rw->value); return; } @@ -301,11 +301,11 @@ ck_swlock_recursive_write_unlatch(ck_swlock_recursive_t *rw) { if (--rw->wc != 0) { - ck_pr_store_32(&rw->rw.lock_bits, CK_SWLOCK_WRITER_BIT); + ck_pr_store_32(&rw->rw.value, CK_SWLOCK_WRITER_BIT); return; } - ck_pr_store_32(&rw->rw.lock_bits, 0); + ck_pr_store_32(&rw->rw.value, 0); return; } From 4471ea2df88be090a01c5bd2506f461d379a0f56 Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Mon, 21 Apr 2014 15:47:18 -0400 Subject: [PATCH 20/31] ck_swlock: Acquire semantics --- include/ck_swlock.h | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 29ac407..3719b01 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -49,6 +49,7 @@ ck_swlock_init(struct ck_swlock *rw) { rw->value = 0; + ck_pr_barrier(); return; } @@ -56,6 +57,7 @@ CK_CC_INLINE static void ck_swlock_write_unlock(ck_swlock_t *rw) { + ck_pr_fence_release(); ck_pr_and_32(&rw->value, CK_SWLOCK_READER_BITS); return; } @@ -64,7 +66,8 @@ CK_CC_INLINE static bool ck_swlock_locked_writer(ck_swlock_t *rw) { - return (ck_pr_load_32(&rw->value) & CK_SWLOCK_WRITER_BIT); + ck_pr_fence_load(); + return ck_pr_load_32(&rw->value) & CK_SWLOCK_WRITER_BIT; } CK_CC_INLINE static void @@ -80,6 +83,7 @@ CK_CC_INLINE static bool ck_swlock_locked(ck_swlock_t *rw) { + ck_pr_fence_load(); return ck_pr_load_32(&rw->value); } @@ -94,6 +98,7 @@ ck_swlock_write_trylock(ck_swlock_t *rw) return false; } + ck_pr_fence_acquire(); return true; } @@ -105,6 +110,7 @@ ck_swlock_write_lock(ck_swlock_t *rw) { ck_pr_or_32(&rw->value, CK_SWLOCK_WRITER_BIT); + ck_pr_fence_acquire(); while ((ck_pr_load_32(&rw->value) & CK_SWLOCK_READER_BITS) != 0) ck_pr_stall(); @@ -117,9 +123,11 @@ ck_swlock_write_latch(ck_swlock_t *rw) { ck_pr_or_32(&rw->value, CK_SWLOCK_WRITER_BIT); - + ck_pr_fence_acquire(); + /* Stall until readers have seen the latch and cleared. */ - while (ck_pr_cas_32(&rw->value, CK_SWLOCK_WRITER_BIT, (CK_SWLOCK_WRITER_BIT | CK_SWLOCK_LATCH_BIT)) == false) { + while (ck_pr_cas_32(&rw->value, CK_SWLOCK_WRITER_BIT, + (CK_SWLOCK_WRITER_BIT | CK_SWLOCK_LATCH_BIT)) == false) { do { ck_pr_stall(); } while (ck_pr_load_uint(&rw->value) != CK_SWLOCK_WRITER_BIT); @@ -132,6 +140,7 @@ CK_CC_INLINE static void ck_swlock_write_unlatch(ck_swlock_t *rw) { + ck_pr_fence_release(); ck_pr_store_32(&rw->value, 0); return; } @@ -148,7 +157,8 @@ ck_swlock_read_trylock(ck_swlock_t *rw) ck_pr_dec_32(&rw->value); return false; } - + + ck_pr_fence_acquire(); return true; } @@ -158,7 +168,6 @@ CK_ELIDE_TRYLOCK_PROTOTYPE(ck_swlock_read, ck_swlock_t, CK_CC_INLINE static void ck_swlock_read_lock(ck_swlock_t *rw) { - uint32_t l; for (;;) { while (ck_pr_load_32(&rw->value) & CK_SWLOCK_WRITER_BIT) @@ -166,6 +175,8 @@ ck_swlock_read_lock(ck_swlock_t *rw) l = ck_pr_faa_32(&rw->value, 1); + ck_pr_fence_acquire(); + if (!(l & CK_SWLOCK_WRITER_BIT)) return; @@ -178,14 +189,15 @@ ck_swlock_read_lock(ck_swlock_t *rw) CK_CC_INLINE static bool ck_swlock_read_trylatchlock(ck_swlock_t *rw) { - uint32_t l = ck_pr_load_32(&rw->value); if (l & CK_SWLOCK_WRITER_BIT) return false; l = ck_pr_faa_32(&rw->value, 1); - + + ck_pr_fence_acquire(); + if (!(l & CK_SWLOCK_WRITER_BIT)) return true; @@ -199,14 +211,15 @@ ck_swlock_read_trylatchlock(ck_swlock_t *rw) CK_CC_INLINE static void ck_swlock_read_latchlock(ck_swlock_t *rw) { - uint32_t l; for (;;) { while (ck_pr_load_32(&rw->value) & CK_SWLOCK_WRITER_BIT) ck_pr_stall(); l = ck_pr_faa_32(&rw->value, 1); - + + ck_pr_fence_acquire(); + if (!(l & (CK_SWLOCK_LATCH_BIT | CK_SWLOCK_WRITER_BIT))) return; @@ -222,6 +235,7 @@ CK_CC_INLINE static bool ck_swlock_locked_reader(ck_swlock_t *rw) { + ck_pr_fence_load(); return ck_pr_load_32(&rw->value) & CK_SWLOCK_READER_BITS; } @@ -229,6 +243,7 @@ CK_CC_INLINE static void ck_swlock_read_unlock(ck_swlock_t *rw) { + ck_pr_fence_release(); ck_pr_dec_32(&rw->value); return; } From c246865ce79253148f8c1301248762e1c5fbd349 Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Mon, 21 Apr 2014 15:49:31 -0400 Subject: [PATCH 21/31] ck_swlock: Removed comment --- include/ck_swlock.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 3719b01..0a66ffb 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -275,10 +275,6 @@ ck_swlock_recursive_write_lock(ck_swlock_recursive_t *rw) return; } -/* - * In recursive mode, latch must be the inner-most acquisition - */ - CK_CC_INLINE static void ck_swlock_recursive_write_latch(ck_swlock_recursive_t *rw) { From e2f7df0f1ddd9ecd559d5c741f9cd9306345fa05 Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Mon, 21 Apr 2014 16:33:27 -0400 Subject: [PATCH 22/31] ck_swlock: Various improvements. - Remove recursive locks. - Remove unnecessary fences. - Add necessary fences. - Simplify state machine. --- include/ck_swlock.h | 169 +++++----------------- regressions/ck_swlock/validate/validate.c | 77 ---------- 2 files changed, 35 insertions(+), 211 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 0a66ffb..494b330 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -39,10 +39,11 @@ struct ck_swlock { }; typedef struct ck_swlock ck_swlock_t; -#define CK_SWLOCK_INITIALIZER {0} -#define CK_SWLOCK_WRITER_BIT (1UL << 31) -#define CK_SWLOCK_LATCH_BIT (1UL << 30) -#define CK_SWLOCK_READER_BITS (UINT32_MAX ^ (CK_SWLOCK_LATCH_BIT | CK_SWLOCK_WRITER_BIT)) +#define CK_SWLOCK_INITIALIZER {0} +#define CK_SWLOCK_WRITER_BIT (1UL << 31) +#define CK_SWLOCK_LATCH_BIT (1UL << 30) +#define CK_SWLOCK_WRITER_MASK (CK_SWLOCK_LATCH_BIT | CK_SWLOCK_WRITER_BIT) +#define CK_SWLOCK_READER_BITS (UINT32_MAX ^ CK_SWLOCK_WRITER_MASK) CK_CC_INLINE static void ck_swlock_init(struct ck_swlock *rw) @@ -93,7 +94,7 @@ ck_swlock_write_trylock(ck_swlock_t *rw) ck_pr_or_32(&rw->value, CK_SWLOCK_WRITER_BIT); - if ((ck_pr_load_32(&rw->value) & CK_SWLOCK_READER_BITS) != 0) { + if (ck_pr_load_32(&rw->value) & CK_SWLOCK_READER_BITS) { ck_swlock_write_unlock(rw); return false; } @@ -110,11 +111,10 @@ ck_swlock_write_lock(ck_swlock_t *rw) { ck_pr_or_32(&rw->value, CK_SWLOCK_WRITER_BIT); - ck_pr_fence_acquire(); - - while ((ck_pr_load_32(&rw->value) & CK_SWLOCK_READER_BITS) != 0) + while (ck_pr_load_32(&rw->value) & CK_SWLOCK_READER_BITS) ck_pr_stall(); + ck_pr_fence_acquire(); return; } @@ -122,17 +122,14 @@ CK_CC_INLINE static void ck_swlock_write_latch(ck_swlock_t *rw) { - ck_pr_or_32(&rw->value, CK_SWLOCK_WRITER_BIT); - ck_pr_fence_acquire(); - /* Stall until readers have seen the latch and cleared. */ - while (ck_pr_cas_32(&rw->value, CK_SWLOCK_WRITER_BIT, - (CK_SWLOCK_WRITER_BIT | CK_SWLOCK_LATCH_BIT)) == false) { + while (ck_pr_cas_32(&rw->value, 0, CK_SWLOCK_WRITER_MASK) == false) { do { ck_pr_stall(); - } while (ck_pr_load_uint(&rw->value) != CK_SWLOCK_WRITER_BIT); + } while (ck_pr_load_uint(&rw->value) != 0); } + ck_pr_fence_acquire(); return; } @@ -169,20 +166,22 @@ CK_CC_INLINE static void ck_swlock_read_lock(ck_swlock_t *rw) { uint32_t l; + for (;;) { while (ck_pr_load_32(&rw->value) & CK_SWLOCK_WRITER_BIT) ck_pr_stall(); l = ck_pr_faa_32(&rw->value, 1); - ck_pr_fence_acquire(); + if (l & CK_SWLOCK_WRITER_BIT) { + ck_pr_dec_32(&rw->value); + continue; + } - if (!(l & CK_SWLOCK_WRITER_BIT)) - return; - - ck_pr_dec_32(&rw->value); + break; } + ck_pr_fence_acquire(); return; } @@ -194,39 +193,41 @@ ck_swlock_read_trylatchlock(ck_swlock_t *rw) if (l & CK_SWLOCK_WRITER_BIT) return false; - l = ck_pr_faa_32(&rw->value, 1); - - ck_pr_fence_acquire(); - - if (!(l & CK_SWLOCK_WRITER_BIT)) + l = ck_pr_faa_32(&rw->value, 1) & CK_SWLOCK_WRITER_MASK; + if (l == 0) { + ck_pr_fence_acquire(); return true; + } - if (!(l & CK_SWLOCK_LATCH_BIT)) + if (l == CK_SWLOCK_WRITER_BIT) ck_pr_dec_32(&rw->value); - + return false; } - CK_CC_INLINE static void ck_swlock_read_latchlock(ck_swlock_t *rw) { uint32_t l; + for (;;) { while (ck_pr_load_32(&rw->value) & CK_SWLOCK_WRITER_BIT) ck_pr_stall(); - l = ck_pr_faa_32(&rw->value, 1); - - ck_pr_fence_acquire(); + l = ck_pr_faa_32(&rw->value, 1) & CK_SWLOCK_WRITER_MASK; + if (l == 0) + break; - if (!(l & (CK_SWLOCK_LATCH_BIT | CK_SWLOCK_WRITER_BIT))) - return; - - if (!(l & CK_SWLOCK_LATCH_BIT)) + /* + * If the latch bit has not been sent, then the writer would + * have observed the reader and will wait to completion of + * read-side critical section. + */ + if (l == CK_SWLOCK_WRITER_BIT) ck_pr_dec_32(&rw->value); } + ck_pr_fence_acquire(); return; } @@ -252,105 +253,5 @@ CK_ELIDE_PROTOTYPE(ck_swlock_read, ck_swlock_t, ck_swlock_locked_writer, ck_swlock_read_lock, ck_swlock_locked_reader, ck_swlock_read_unlock) -/* - * Recursive writer reader-writer lock implementation. - */ -struct ck_swlock_recursive { - struct ck_swlock rw; - uint32_t wc; -}; -typedef struct ck_swlock_recursive ck_swlock_recursive_t; - -#define CK_SWLOCK_RECURSIVE_INITIALIZER {CK_SWLOCK_INITIALIZER, 0} - -CK_CC_INLINE static void -ck_swlock_recursive_write_lock(ck_swlock_recursive_t *rw) -{ - - if (++rw->wc != 1) { - return; - } - - ck_swlock_write_lock(&rw->rw); - return; -} - -CK_CC_INLINE static void -ck_swlock_recursive_write_latch(ck_swlock_recursive_t *rw) -{ - - ck_swlock_write_latch(&rw->rw); - rw->wc++; - return; -} - -CK_CC_INLINE static bool -ck_swlock_recursive_write_trylock(ck_swlock_recursive_t *rw) -{ - - if (ck_swlock_write_trylock(&rw->rw) == true) { - rw->wc++; - return true; - } - - return false; -} - -CK_CC_INLINE static void -ck_swlock_recursive_write_unlock(ck_swlock_recursive_t *rw) -{ - - if (--rw->wc != 0) - return; - - ck_swlock_write_unlock(&rw->rw); - return; -} - -CK_CC_INLINE static void -ck_swlock_recursive_write_unlatch(ck_swlock_recursive_t *rw) -{ - - if (--rw->wc != 0) { - ck_pr_store_32(&rw->rw.value, CK_SWLOCK_WRITER_BIT); - return; - } - - ck_pr_store_32(&rw->rw.value, 0); - return; -} - - -CK_CC_INLINE static void -ck_swlock_recursive_read_lock(ck_swlock_recursive_t *rw) -{ - - ck_swlock_read_lock(&rw->rw); - return; -} - -CK_CC_INLINE static void -ck_swlock_recursive_read_latchlock(ck_swlock_recursive_t *rw) -{ - - ck_swlock_read_latchlock(&rw->rw); - return; -} - -CK_CC_INLINE static bool -ck_swlock_recursive_read_trylock(ck_swlock_recursive_t *rw) -{ - - return ck_swlock_read_trylock(&rw->rw); -} - -CK_CC_INLINE static void -ck_swlock_recursive_read_unlock(ck_swlock_recursive_t *rw) -{ - - ck_swlock_read_unlock(&rw->rw); - return; -} - #endif /* _CK_SWLOCK_H */ diff --git a/regressions/ck_swlock/validate/validate.c b/regressions/ck_swlock/validate/validate.c index d7561ef..acec7fe 100644 --- a/regressions/ck_swlock/validate/validate.c +++ b/regressions/ck_swlock/validate/validate.c @@ -48,82 +48,6 @@ static struct affinity a; static unsigned int locked; static int nthr; static ck_swlock_t lock = CK_SWLOCK_INITIALIZER; -static ck_swlock_recursive_t r_lock = CK_SWLOCK_RECURSIVE_INITIALIZER; - -static void * -thread_recursive(void *arg) -{ - int i = ITERATE; - unsigned int l; - int tid = ck_pr_load_int(arg); - - if (aff_iterate(&a)) { - perror("ERROR: Could not affine thread"); - exit(EXIT_FAILURE); - } - - while (i--) { - if (tid == 0) { - /* Writer */ - while (ck_swlock_recursive_write_trylock(&r_lock) == false) - ck_pr_stall(); - - ck_swlock_recursive_write_lock(&r_lock); - ck_swlock_recursive_write_lock(&r_lock); - ck_swlock_recursive_write_latch(&r_lock); - - { - l = ck_pr_load_uint(&locked); - if (l != 0) { - ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); - } - - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - ck_pr_inc_uint(&locked); - - l = ck_pr_load_uint(&locked); - if (l != 8) { - ck_error("ERROR [WR:%d]: %u != 2\n", __LINE__, l); - } - - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - ck_pr_dec_uint(&locked); - - l = ck_pr_load_uint(&locked); - if (l != 0) { - ck_error("ERROR [WR:%d]: %u != 0\n", __LINE__, l); - } - } - ck_swlock_recursive_write_unlatch(&r_lock); - ck_swlock_recursive_write_unlock(&r_lock); - ck_swlock_recursive_write_unlock(&r_lock); - ck_swlock_recursive_write_unlock(&r_lock); - } - - ck_swlock_recursive_read_latchlock(&r_lock); - { - l = ck_pr_load_uint(&locked); - if (l != 0) { - ck_error("ERROR [RD:%d]: %u != 0\n", __LINE__, l); - } - } - ck_swlock_recursive_read_unlock(&r_lock); - } - - return (NULL); -} #ifdef CK_F_PR_RTM static void * @@ -524,7 +448,6 @@ main(int argc, char *argv[]) swlock_test(threads, thread_rtm_mix, "rtm-mix"); swlock_test(threads, thread_rtm_adaptive, "rtm-adaptive"); #endif - swlock_test(threads, thread_recursive, "recursive"); return 0; } From bbad068f5a6e419fe8b991aa91a1c247ed3a3df6 Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Mon, 21 Apr 2014 16:47:21 -0400 Subject: [PATCH 23/31] ck_swlock: Make latch operation write-biased. --- include/ck_swlock.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 494b330..3081a3a 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -122,7 +122,10 @@ CK_CC_INLINE static void ck_swlock_write_latch(ck_swlock_t *rw) { - /* Stall until readers have seen the latch and cleared. */ + /* Publish intent to acquire lock. */ + ck_pr_or_32(&rw->value, CK_SWLOCK_WRITER_BIT); + + /* Stall until readers have seen the seen writer and cleared. */ while (ck_pr_cas_32(&rw->value, 0, CK_SWLOCK_WRITER_MASK) == false) { do { ck_pr_stall(); From 4d7f4fff6d7aac77f9d1c0ce6ef56b9013a6f5fa Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Mon, 21 Apr 2014 16:59:32 -0400 Subject: [PATCH 24/31] ck_swlock: Fix latch operation. --- include/ck_swlock.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 3081a3a..402bde5 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -126,10 +126,11 @@ ck_swlock_write_latch(ck_swlock_t *rw) ck_pr_or_32(&rw->value, CK_SWLOCK_WRITER_BIT); /* Stall until readers have seen the seen writer and cleared. */ - while (ck_pr_cas_32(&rw->value, 0, CK_SWLOCK_WRITER_MASK) == false) { + while (ck_pr_cas_32(&rw->value, CK_SWLOCK_WRITER_BIT, + CK_SWLOCK_WRITER_MASK) == false) { do { ck_pr_stall(); - } while (ck_pr_load_uint(&rw->value) != 0); + } while (ck_pr_load_32(&rw->value) != 0); } ck_pr_fence_acquire(); From 217f7a2f32968bf128ee0acc0c73a6b3d52f43cb Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Mon, 21 Apr 2014 17:04:02 -0400 Subject: [PATCH 25/31] ck_swlock: Load spin on write bit. --- include/ck_swlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 402bde5..7c90ce6 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -130,7 +130,7 @@ ck_swlock_write_latch(ck_swlock_t *rw) CK_SWLOCK_WRITER_MASK) == false) { do { ck_pr_stall(); - } while (ck_pr_load_32(&rw->value) != 0); + } while (ck_pr_load_32(&rw->value) != CK_SWLOCK_WRITER_BIT); } ck_pr_fence_acquire(); From b70a5479cadb1d8baa088899c69984f142f74dbc Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Mon, 21 Apr 2014 17:04:45 -0400 Subject: [PATCH 26/31] ck_swlock: snapshot --- include/ck_swlock.h | 50 +++------------------------------------------ 1 file changed, 3 insertions(+), 47 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index 402bde5..76a7ee7 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -91,16 +91,8 @@ ck_swlock_locked(ck_swlock_t *rw) CK_CC_INLINE static bool ck_swlock_write_trylock(ck_swlock_t *rw) { - - ck_pr_or_32(&rw->value, CK_SWLOCK_WRITER_BIT); - - if (ck_pr_load_32(&rw->value) & CK_SWLOCK_READER_BITS) { - ck_swlock_write_unlock(rw); - return false; - } - ck_pr_fence_acquire(); - return true; + return ck_pr_cas_32(&rw->value, 0, CK_SWLOCK_WRITER_BIT); } CK_ELIDE_TRYLOCK_PROTOTYPE(ck_swlock_write, ck_swlock_t, @@ -150,47 +142,11 @@ CK_ELIDE_PROTOTYPE(ck_swlock_write, ck_swlock_t, ck_swlock_locked, ck_swlock_write_lock, ck_swlock_locked_writer, ck_swlock_write_unlock) -CK_CC_INLINE static bool -ck_swlock_read_trylock(ck_swlock_t *rw) -{ - - if (ck_pr_faa_32(&rw->value, 1) & CK_SWLOCK_WRITER_BIT) { - ck_pr_dec_32(&rw->value); - return false; - } - - ck_pr_fence_acquire(); - return true; -} - CK_ELIDE_TRYLOCK_PROTOTYPE(ck_swlock_read, ck_swlock_t, ck_swlock_locked_writer, ck_swlock_read_trylock) -CK_CC_INLINE static void -ck_swlock_read_lock(ck_swlock_t *rw) -{ - uint32_t l; - - for (;;) { - while (ck_pr_load_32(&rw->value) & CK_SWLOCK_WRITER_BIT) - ck_pr_stall(); - - l = ck_pr_faa_32(&rw->value, 1); - - if (l & CK_SWLOCK_WRITER_BIT) { - ck_pr_dec_32(&rw->value); - continue; - } - - break; - } - - ck_pr_fence_acquire(); - return; -} - CK_CC_INLINE static bool -ck_swlock_read_trylatchlock(ck_swlock_t *rw) +ck_swlock_read_trylock(ck_swlock_t *rw) { uint32_t l = ck_pr_load_32(&rw->value); @@ -210,7 +166,7 @@ ck_swlock_read_trylatchlock(ck_swlock_t *rw) } CK_CC_INLINE static void -ck_swlock_read_latchlock(ck_swlock_t *rw) +ck_swlock_read_lock(ck_swlock_t *rw) { uint32_t l; From 7a9d70b59c2b8122ce77e0d417612e5561d8902e Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Mon, 21 Apr 2014 17:07:32 -0400 Subject: [PATCH 27/31] ck_swlock: Update regressions --- regressions/ck_swlock/validate/validate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/regressions/ck_swlock/validate/validate.c b/regressions/ck_swlock/validate/validate.c index acec7fe..da94cac 100644 --- a/regressions/ck_swlock/validate/validate.c +++ b/regressions/ck_swlock/validate/validate.c @@ -321,14 +321,14 @@ thread_latch(void *arg) ck_swlock_write_unlatch(&lock); } - ck_swlock_read_latchlock(&lock); + ck_swlock_read_lock(&lock); { l = ck_pr_load_uint(&locked); if (l != 0) { ck_error("ERROR [RD:%d]: %u != 0\n", __LINE__, l); } } - ck_swlock_read_unlock(&lock); + ck_swlock_read_lock(&lock); } return (NULL); From 11f79b64f72fde6154223d13656e9c1591b2da37 Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Mon, 21 Apr 2014 21:52:21 +0000 Subject: [PATCH 28/31] ck_swlock: cleanup --- include/ck_swlock.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index fef1222..a876b46 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -43,7 +43,8 @@ typedef struct ck_swlock ck_swlock_t; #define CK_SWLOCK_WRITER_BIT (1UL << 31) #define CK_SWLOCK_LATCH_BIT (1UL << 30) #define CK_SWLOCK_WRITER_MASK (CK_SWLOCK_LATCH_BIT | CK_SWLOCK_WRITER_BIT) -#define CK_SWLOCK_READER_BITS (UINT32_MAX ^ CK_SWLOCK_WRITER_MASK) +#define CK_SWLOCK_READER_MASK (UINT32_MAX ^ CK_SWLOCK_WRITER_MASK) + CK_CC_INLINE static void ck_swlock_init(struct ck_swlock *rw) @@ -59,7 +60,7 @@ ck_swlock_write_unlock(ck_swlock_t *rw) { ck_pr_fence_release(); - ck_pr_and_32(&rw->value, CK_SWLOCK_READER_BITS); + ck_pr_and_32(&rw->value, CK_SWLOCK_READER_MASK); return; } @@ -103,7 +104,7 @@ ck_swlock_write_lock(ck_swlock_t *rw) { ck_pr_or_32(&rw->value, CK_SWLOCK_WRITER_BIT); - while (ck_pr_load_32(&rw->value) & CK_SWLOCK_READER_BITS) + while (ck_pr_load_32(&rw->value) & CK_SWLOCK_READER_MASK) ck_pr_stall(); ck_pr_fence_acquire(); @@ -179,7 +180,7 @@ ck_swlock_read_lock(ck_swlock_t *rw) break; /* - * If the latch bit has not been sent, then the writer would + * If the latch bit has not been set, then the writer would * have observed the reader and will wait to completion of * read-side critical section. */ @@ -197,7 +198,7 @@ ck_swlock_locked_reader(ck_swlock_t *rw) { ck_pr_fence_load(); - return ck_pr_load_32(&rw->value) & CK_SWLOCK_READER_BITS; + return ck_pr_load_32(&rw->value) & CK_SWLOCK_READER_MASK; } CK_CC_INLINE static void From 2950565037073521bace3d16e79180d10c0de23c Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Mon, 21 Apr 2014 22:04:02 +0000 Subject: [PATCH 29/31] ck_swlock: Fix regressions --- regressions/ck_swlock/validate/validate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regressions/ck_swlock/validate/validate.c b/regressions/ck_swlock/validate/validate.c index da94cac..bc2f496 100644 --- a/regressions/ck_swlock/validate/validate.c +++ b/regressions/ck_swlock/validate/validate.c @@ -328,7 +328,7 @@ thread_latch(void *arg) ck_error("ERROR [RD:%d]: %u != 0\n", __LINE__, l); } } - ck_swlock_read_lock(&lock); + ck_swlock_read_unlock(&lock); } return (NULL); From 822a3e498eb605bb75b36f61724640b03cfb489d Mon Sep 17 00:00:00 2001 From: Jaidev Sridhar Date: Mon, 21 Apr 2014 22:16:52 +0000 Subject: [PATCH 30/31] regressions: Remove ck_swlock_read_latchlock() from benchmarks --- regressions/ck_swlock/benchmark/latency.c | 14 ----- regressions/ck_swlock/benchmark/throughput.c | 66 -------------------- 2 files changed, 80 deletions(-) diff --git a/regressions/ck_swlock/benchmark/latency.c b/regressions/ck_swlock/benchmark/latency.c index aeb3e8a..7261546 100644 --- a/regressions/ck_swlock/benchmark/latency.c +++ b/regressions/ck_swlock/benchmark/latency.c @@ -81,20 +81,6 @@ main(void) e_b = rdtsc(); printf(" LATCH: swlock %15" PRIu64 "\n", (e_b - s_b) / STEPS); - for (i = 0; i < STEPS; i++) { - ck_swlock_read_latchlock(&swlock); - ck_swlock_read_unlock(&swlock); - } - - s_b = rdtsc(); - for (i = 0; i < STEPS; i++) { - ck_swlock_read_latchlock(&swlock); - ck_swlock_read_unlock(&swlock); - } - e_b = rdtsc(); - printf(" READ_LATCHLOCK: swlock %15" PRIu64 "\n", (e_b - s_b) / STEPS); - - return 0; } diff --git a/regressions/ck_swlock/benchmark/throughput.c b/regressions/ck_swlock/benchmark/throughput.c index 1b79f82..fa3cf1c 100644 --- a/regressions/ck_swlock/benchmark/throughput.c +++ b/regressions/ck_swlock/benchmark/throughput.c @@ -114,71 +114,6 @@ thread_lock(void *pun) return NULL; } -static void * -thread_latchlock(void *pun) -{ - uint64_t s_b, e_b, a, i; - uint64_t *value = pun; - - if (aff_iterate(&affinity) != 0) { - perror("ERROR: Could not affine thread"); - exit(EXIT_FAILURE); - } - - ck_pr_inc_int(&barrier); - while (ck_pr_load_int(&barrier) != threads) - ck_pr_stall(); - - for (i = 1, a = 0;; i++) { - s_b = rdtsc(); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - ck_swlock_read_latchlock(&rw.lock); - ck_swlock_read_unlock(&rw.lock); - e_b = rdtsc(); - - a += (e_b - s_b) >> 4; - - if (ck_pr_load_uint(&flag) == 1) - break; - } - - ck_pr_inc_int(&barrier); - while (ck_pr_load_int(&barrier) != threads * 2) - ck_pr_stall(); - - *value = (a / i); - return NULL; -} - static void swlock_test(pthread_t *p, int d, uint64_t *latency, void *(*f)(void *), const char *label) { @@ -242,7 +177,6 @@ main(int argc, char *argv[]) d = atoi(argv[1]); swlock_test(p, d, latency, thread_lock, "swlock"); - swlock_test(p, d, latency, thread_latchlock, "swlock_latchlock"); return 0; } From 6b2f5130f77197998fa1fa381b3def817ad335b9 Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Tue, 22 Apr 2014 11:14:52 -0400 Subject: [PATCH 31/31] ck_swlock: Whitespace changes. --- include/ck_swlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ck_swlock.h b/include/ck_swlock.h index a876b46..134df6c 100644 --- a/include/ck_swlock.h +++ b/include/ck_swlock.h @@ -45,7 +45,6 @@ typedef struct ck_swlock ck_swlock_t; #define CK_SWLOCK_WRITER_MASK (CK_SWLOCK_LATCH_BIT | CK_SWLOCK_WRITER_BIT) #define CK_SWLOCK_READER_MASK (UINT32_MAX ^ CK_SWLOCK_WRITER_MASK) - CK_CC_INLINE static void ck_swlock_init(struct ck_swlock *rw) { @@ -92,6 +91,7 @@ ck_swlock_locked(ck_swlock_t *rw) CK_CC_INLINE static bool ck_swlock_write_trylock(ck_swlock_t *rw) { + ck_pr_fence_acquire(); return ck_pr_cas_32(&rw->value, 0, CK_SWLOCK_WRITER_BIT); }