From bcaadcf0944a384141dfd907b6aaf7682d65bee2 Mon Sep 17 00:00:00 2001 From: Samy Al Bahra Date: Tue, 22 Feb 2011 22:01:08 -0500 Subject: [PATCH] Factor out some common utility functions. Moved rdtsc and affinity logic to a single file which other regression tests use. Single point of reference will ease porting these to future architectures and platforms. Removed invalid Copyright statement. Added CK_CC_USED to force some code generation that I found useful for debugging. Added ck_stack latency tests and a modified version of djoseph's modifications to benchmark.h for spinlock latency tests. --- include/gcc/ck_cc.h | 1 + regressions/Makefile | 2 + regressions/ck_bytelock/benchmark/latency.c | 32 +--- regressions/ck_bytelock/validate/Makefile | 7 +- regressions/ck_bytelock/validate/serial.c | 145 ------------------ regressions/ck_bytelock/validate/validate.c | 52 +------ regressions/ck_fifo/validate/ck_fifo_mpmc.c | 48 +----- regressions/ck_fifo/validate/ck_fifo_spsc.c | 46 +----- regressions/ck_hp/validate/ck_hp_fifo.c | 48 +----- regressions/ck_hp/validate/nbds_haz_test.c | 50 +----- regressions/ck_pr/benchmark/benchmark.h | 94 +----------- regressions/ck_ring/benchmark/latency.c | 26 +--- regressions/ck_ring/validate/ck_ring_spsc.c | 47 +----- .../ck_ring/validate/ck_ring_spsc_template.c | 47 +----- .../ck_sequence/validate/ck_sequence.c | 47 +----- regressions/ck_spinlock/benchmark/Makefile | 80 +++++++--- .../ck_spinlock/benchmark/benchmark.sh | 32 ---- .../ck_spinlock/benchmark/ck_anderson.c | 8 +- regressions/ck_spinlock/benchmark/ck_cas.c | 8 +- regressions/ck_spinlock/benchmark/ck_clh.c | 7 +- regressions/ck_spinlock/benchmark/ck_dec.c | 7 +- regressions/ck_spinlock/benchmark/ck_fas.c | 7 +- regressions/ck_spinlock/benchmark/ck_mcs.c | 7 +- regressions/ck_spinlock/benchmark/ck_ticket.c | 8 +- .../ck_spinlock/benchmark/ck_ticket_pb.c | 7 +- regressions/ck_spinlock/benchmark/latency.h | 48 ++++++ .../ck_spinlock/benchmark/linux_spinlock.c | 7 +- .../ck_spinlock/benchmark/test_and_set.c | 2 - .../benchmark/{benchmark.h => throughput.h} | 110 +------------ regressions/ck_spinlock/linux_spinlock.h | 5 +- regressions/ck_spinlock/test_and_set.h | 34 ---- regressions/ck_spinlock/validate/Makefile | 5 +- .../ck_spinlock/validate/test_and_set.c | 2 - regressions/ck_spinlock/validate/validate.h | 49 +----- regressions/ck_stack/benchmark/Makefile | 14 ++ regressions/ck_stack/benchmark/latency.c | 125 +++++++++++++++ regressions/ck_stack/validate/benchmark.sh | 39 ----- regressions/ck_stack/validate/pair.c | 50 +----- regressions/ck_stack/validate/pop.c | 53 +------ regressions/ck_stack/validate/push.c | 52 +------ regressions/common.h | 104 +++++++++++++ 41 files changed, 441 insertions(+), 1121 deletions(-) delete mode 100644 regressions/ck_bytelock/validate/serial.c delete mode 100755 regressions/ck_spinlock/benchmark/benchmark.sh create mode 100644 regressions/ck_spinlock/benchmark/latency.h delete mode 100644 regressions/ck_spinlock/benchmark/test_and_set.c rename regressions/ck_spinlock/benchmark/{benchmark.h => throughput.h} (61%) delete mode 100644 regressions/ck_spinlock/test_and_set.h delete mode 100644 regressions/ck_spinlock/validate/test_and_set.c create mode 100644 regressions/ck_stack/benchmark/Makefile create mode 100644 regressions/ck_stack/benchmark/latency.c delete mode 100755 regressions/ck_stack/validate/benchmark.sh create mode 100644 regressions/common.h diff --git a/include/gcc/ck_cc.h b/include/gcc/ck_cc.h index 8fb80b7..6d8dbcd 100644 --- a/include/gcc/ck_cc.h +++ b/include/gcc/ck_cc.h @@ -33,6 +33,7 @@ * Suppress unused warnings. */ #define CK_CC_UNUSED __attribute__((unused)) +#define CK_CC_USED __attribute__((used)) /* * If optimizations are turned on, then force inlining. diff --git a/regressions/Makefile b/regressions/Makefile index 0840bbb..5f27484 100644 --- a/regressions/Makefile +++ b/regressions/Makefile @@ -10,6 +10,7 @@ all: make -C ./ck_bytelock/benchmark all make -C ./ck_sequence/validate all make -C ./ck_stack/validate all + make -C ./ck_stack/benchmark all make -C ./ck_ring/validate all make -C ./ck_hp/validate all @@ -23,6 +24,7 @@ clean: make -C ./ck_bytelock/benchmark clean make -C ./ck_sequence/validate clean make -C ./ck_stack/validate clean + make -C ./ck_stack/benchmark clean make -C ./ck_ring/validate clean make -C ./ck_hp/validate clean diff --git a/regressions/ck_bytelock/benchmark/latency.c b/regressions/ck_bytelock/benchmark/latency.c index c434304..f6459f4 100644 --- a/regressions/ck_bytelock/benchmark/latency.c +++ b/regressions/ck_bytelock/benchmark/latency.c @@ -3,6 +3,8 @@ #include #include +#include "../../common.h" + #ifndef STEPS #define STEPS 1000000 #endif @@ -69,36 +71,6 @@ rwlock_read_unlock(rwlock_t *rw) return; } -static inline uint64_t -rdtsc(void) -{ -#if defined(__x86_64__) || defined(__x86__) - uint32_t eax = 0, edx; - - __asm__ __volatile__("cpuid;" - "rdtsc;" - : "+a" (eax), "=d" (edx) - : - : "%ecx", "%ebx", "memory"); - - __asm__ __volatile__("xorl %%eax, %%eax;" - "cpuid;" - : - : - : "%eax", "%ebx", "%ecx", "%edx", "memory"); - - return (((uint64_t)edx << 32) | eax); -#elif defined(__sparcv9__) - return 0; -#if 0 - uint64_t r; - - __asm__ __volatile__("rd %%ticks, %0" : "=r" (r) :: "memory"); - return r; -#endif -#endif -} - int main(void) { diff --git a/regressions/ck_bytelock/validate/Makefile b/regressions/ck_bytelock/validate/Makefile index 26df016..f59cac8 100644 --- a/regressions/ck_bytelock/validate/Makefile +++ b/regressions/ck_bytelock/validate/Makefile @@ -1,17 +1,14 @@ .PHONY: clean distribution -OBJECTS=serial validate +OBJECTS=validate all: $(OBJECTS) validate: validate.c $(CC) $(CFLAGS) -o validate validate.c -lpthread -serial: serial.c - $(CC) $(CFLAGS) -o serial serial.c - clean: rm -rf *.dSYM *~ *.o $(OBJECTS) include ../../../build/regressions.build -CFLAGS+=-ggdb -D_GNU_SOURCE +CFLAGS+=-D_GNU_SOURCE diff --git a/regressions/ck_bytelock/validate/serial.c b/regressions/ck_bytelock/validate/serial.c deleted file mode 100644 index 9abb955..0000000 --- a/regressions/ck_bytelock/validate/serial.c +++ /dev/null @@ -1,145 +0,0 @@ -#include -#include -#include -#include - -#ifndef STEPS -#define STEPS 100000ULL -#endif - -/* - * This is a naive reader/writer spinlock. - */ -struct rwlock { - unsigned int readers; - ck_spinlock_fas_t writer; -}; -typedef struct rwlock rwlock_t; - -static CK_CC_INLINE void -rwlock_init(rwlock_t *rw) -{ - - ck_pr_store_uint(&rw->readers, 0); - ck_spinlock_fas_init(&rw->writer); - return; -} - -static CK_CC_INLINE void -rwlock_write_lock(rwlock_t *rw) -{ - - ck_spinlock_fas_lock(&rw->writer); - while (ck_pr_load_uint(&rw->readers) != 0) - ck_pr_stall(); - - return; -} - -static CK_CC_INLINE void -rwlock_write_unlock(rwlock_t *rw) -{ - - ck_spinlock_fas_unlock(&rw->writer); - return; -} - -static CK_CC_INLINE void -rwlock_read_lock(rwlock_t *rw) -{ - - for (;;) { - while (ck_pr_load_uint(&rw->writer.value) != 0) - ck_pr_stall(); - - ck_pr_inc_uint(&rw->readers); - if (ck_pr_load_uint(&rw->writer.value) == 0) - break; - ck_pr_dec_uint(&rw->readers); - } - - return; -} - -static CK_CC_INLINE void -rwlock_read_unlock(rwlock_t *rw) -{ - - ck_pr_dec_uint(&rw->readers); - return; -} - - -static inline uint64_t -rdtsc(void) -{ -#if defined(__x86_64__) - uint32_t eax = 0, edx; - - __asm__ __volatile__("cpuid;" - "rdtsc;" - : "+a" (eax), "=d" (edx) - : - : "%rcx", "%rbx", "memory"); - - __asm__ __volatile__("xorl %%eax, %%eax;" - "cpuid;" - : - : - : "%rax", "%rbx", "%rcx", "%rdx", "memory"); - - return (((uint64_t)edx << 32) | eax); -#endif - - return 0; -} - -int -main(void) -{ - uint64_t s_b, e_b; - uint64_t i; - ck_bytelock_t bytelock = CK_BYTELOCK_INITIALIZER; - rwlock_t naive; - - ck_bytelock_write_lock(&bytelock, 1); - ck_bytelock_write_unlock(&bytelock); - - s_b = rdtsc(); - for (i = 0; i < STEPS; i++) { - ck_bytelock_write_lock(&bytelock, 1); - ck_bytelock_write_unlock(&bytelock); - } - e_b = rdtsc(); - printf("WRITE: bytelock %15" PRIu64 "\n", e_b - s_b); - - rwlock_init(&naive); - rwlock_write_lock(&naive); - rwlock_write_unlock(&naive); - - s_b = rdtsc(); - for (i = 0; i < STEPS; i++) { - rwlock_write_lock(&naive); - rwlock_write_unlock(&naive); - } - e_b = rdtsc(); - printf("WRITE: naive %15" PRIu64 "\n", e_b - s_b); - - s_b = rdtsc(); - for (i = 0; i < STEPS; i++) { - ck_bytelock_read_lock(&bytelock, 1); - ck_bytelock_read_unlock(&bytelock, 1); - } - e_b = rdtsc(); - printf("READ: bytelock %15" PRIu64 "\n", e_b - s_b); - - s_b = rdtsc(); - for (i = 0; i < STEPS; i++) { - rwlock_write_lock(&naive); - rwlock_write_unlock(&naive); - } - e_b = rdtsc(); - printf("READ: naive %15" PRIu64 "\n", e_b - s_b); - - return (0); -} diff --git a/regressions/ck_bytelock/validate/validate.c b/regressions/ck_bytelock/validate/validate.c index 5a97767..1e0e931 100644 --- a/regressions/ck_bytelock/validate/validate.c +++ b/regressions/ck_bytelock/validate/validate.c @@ -12,26 +12,12 @@ #include #include -#ifdef __linux__ -#include -#include -#include -#include -#endif - -#ifndef CORES -#define CORES 8 -#endif +#include "../../common.h" #ifndef ITERATE -#define ITERATE 128000 +#define ITERATE 5000000 #endif -struct affinity { - uint32_t delta; - uint32_t request; -}; - struct block { unsigned int tid; }; @@ -39,44 +25,13 @@ struct block { static struct affinity a; static unsigned int locked = 0; static int nthr; - static ck_bytelock_t lock = CK_BYTELOCK_INITIALIZER; -#ifdef __linux__ -#ifndef gettid -static pid_t -gettid(void) -{ - return syscall(__NR_gettid); -} -#endif - -static int -aff_iterate(struct affinity *acb) -{ - cpu_set_t s; - int c; - - c = ck_pr_faa_32(&acb->request, acb->delta); - CPU_ZERO(&s); - CPU_SET(c % CORES, &s); - - return sched_setaffinity(gettid(), sizeof(s), &s); -} -#else -static int -aff_iterate(struct affinity *acb) -{ - acb = NULL; - return (0); -} -#endif - static void * thread(void *null) { struct block *context = null; - int i = 1000000; + int i = ITERATE; unsigned int l; if (aff_iterate(&a)) { @@ -170,7 +125,6 @@ main(int argc, char *argv[]) } a.delta = atoi(argv[2]); - a.request = 0; fprintf(stderr, "Creating threads (mutual exclusion)..."); for (i = 0; i < nthr; i++) { diff --git a/regressions/ck_fifo/validate/ck_fifo_mpmc.c b/regressions/ck_fifo/validate/ck_fifo_mpmc.c index 3c12915..375c071 100644 --- a/regressions/ck_fifo/validate/ck_fifo_mpmc.c +++ b/regressions/ck_fifo/validate/ck_fifo_mpmc.c @@ -4,27 +4,13 @@ #include #include -#ifdef CK_F_FIFO_MPMC -#ifdef __linux__ -#include -#include -#include -#include -#endif +#include "../../common.h" +#ifdef CK_F_FIFO_MPMC #ifndef ITERATIONS #define ITERATIONS 128 #endif -#ifndef CORES -#define CORES 8 -#endif - -struct affinity { - uint32_t delta; - uint32_t request; -}; - struct context { unsigned int tid; unsigned int previous; @@ -46,36 +32,6 @@ static struct affinity a; static int size; static unsigned int barrier; -#ifdef __linux__ -#ifndef gettid -static pid_t -gettid(void) -{ - return syscall(__NR_gettid); -} -#endif - -static int -aff_iterate(struct affinity *acb) -{ - cpu_set_t s; - int c; - - c = ck_pr_faa_32(&acb->request, acb->delta); - CPU_ZERO(&s); - CPU_SET(c % CORES, &s); - - return sched_setaffinity(gettid(), sizeof(s), &s); -} -#else -static int -aff_iterate(struct affinity *acb) -{ - acb = NULL; - return (0); -} -#endif - static void * test(void *c) { diff --git a/regressions/ck_fifo/validate/ck_fifo_spsc.c b/regressions/ck_fifo/validate/ck_fifo_spsc.c index 85c7be8..118a68d 100644 --- a/regressions/ck_fifo/validate/ck_fifo_spsc.c +++ b/regressions/ck_fifo/validate/ck_fifo_spsc.c @@ -5,26 +5,12 @@ #include -#ifdef __linux__ -#include -#include -#include -#include -#endif +#include "../../common.h" #ifndef ITERATIONS #define ITERATIONS 128 #endif -#ifndef CORES -#define CORES 8 -#endif - -struct affinity { - uint32_t delta; - uint32_t request; -}; - struct context { unsigned int tid; unsigned int previous; @@ -42,36 +28,6 @@ static struct affinity a; static int size; static unsigned int barrier; -#ifdef __linux__ -#ifndef gettid -static pid_t -gettid(void) -{ - return syscall(__NR_gettid); -} -#endif - -static int -aff_iterate(struct affinity *acb) -{ - cpu_set_t s; - int c; - - c = ck_pr_faa_32(&acb->request, acb->delta); - CPU_ZERO(&s); - CPU_SET(c % CORES, &s); - - return sched_setaffinity(gettid(), sizeof(s), &s); -} -#else -static int -aff_iterate(struct affinity *acb) -{ - acb = NULL; - return (0); -} -#endif - static void * test(void *c) { diff --git a/regressions/ck_hp/validate/ck_hp_fifo.c b/regressions/ck_hp/validate/ck_hp_fifo.c index 249d9b4..5ebead5 100644 --- a/regressions/ck_hp/validate/ck_hp_fifo.c +++ b/regressions/ck_hp/validate/ck_hp_fifo.c @@ -2,29 +2,14 @@ #include #include #include - #include -#ifdef __linux__ -#include -#include -#include -#include -#endif +#include "../../common.h" #ifndef ITERATIONS #define ITERATIONS 128 #endif -#ifndef CORES -#define CORES 8 -#endif - -struct affinity { - uint32_t delta; - uint32_t request; -}; - struct context { unsigned int tid; unsigned int previous; @@ -45,36 +30,6 @@ static int size; static unsigned int barrier; static unsigned int e_barrier; -#ifdef __linux__ -#ifndef gettid -static pid_t -gettid(void) -{ - return syscall(__NR_gettid); -} -#endif - -static int -aff_iterate(struct affinity *acb) -{ - cpu_set_t s; - int c; - - c = ck_pr_faa_32(&acb->request, acb->delta); - CPU_ZERO(&s); - CPU_SET(c % CORES, &s); - - return sched_setaffinity(gettid(), sizeof(s), &s); -} -#else -static int -aff_iterate(struct affinity *acb) -{ - acb = NULL; - return (0); -} -#endif - static void * test(void *c) { @@ -142,7 +97,6 @@ main(int argc, char *argv[]) exit(EXIT_FAILURE); } - a.request = 0; a.delta = atoi(argv[2]); nthr = atoi(argv[1]); diff --git a/regressions/ck_hp/validate/nbds_haz_test.c b/regressions/ck_hp/validate/nbds_haz_test.c index 5935f62..3098118 100644 --- a/regressions/ck_hp/validate/nbds_haz_test.c +++ b/regressions/ck_hp/validate/nbds_haz_test.c @@ -1,6 +1,5 @@ /* * Copyright 2010 Samy Al Bahra. - * Copyright 2010 Message Systems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,17 +42,6 @@ #include -#ifdef __linux__ -#include -#include -#include -#include -#endif - -#ifndef CORES -#define CORES 8 -#endif - #include #include #include @@ -61,6 +49,8 @@ #include #include +#include "../../common.h" + #define STACK_CONTAINER(T, M, N) CK_CC_CONTAINER(stack_entry_t, T, M, N) struct stack_entry { @@ -95,44 +85,8 @@ hp_stack_t stack = {NULL, NULL}; ck_hp_t stack_hp; STACK_CONTAINER(struct node, stack_entry, stack_container) - -struct affinity { - uint32_t delta; - uint32_t request; -}; - static struct affinity a; -#ifdef __linux__ -#ifndef gettid -static pid_t -gettid(void) -{ - return syscall(__NR_gettid); -} -#endif - -static int -aff_iterate(struct affinity *acb) -{ - cpu_set_t s; - int c; - - c = ck_pr_faa_32(&acb->request, acb->delta); - CPU_ZERO(&s); - CPU_SET(c % CORES, &s); - - return sched_setaffinity(gettid(), sizeof(s), &s); -} -#else -static int -aff_iterate(struct affinity *acb) -{ - acb = NULL; - return (0); -} -#endif - /* * Stack producer operation safe for multiple unique producers and multiple consumers. */ diff --git a/regressions/ck_pr/benchmark/benchmark.h b/regressions/ck_pr/benchmark/benchmark.h index 447abb7..82a6517 100644 --- a/regressions/ck_pr/benchmark/benchmark.h +++ b/regressions/ck_pr/benchmark/benchmark.h @@ -11,29 +11,11 @@ #include -#ifdef __linux__ -#include -#include -#include -#include -#endif - -#ifndef CORES -#define CORES 8 -#endif +#include "../../common.h" /* 8! = 40320, evenly divide 1 .. 8 processor workload. */ #define WORKLOAD (40320 * 2056) -#ifndef ITERATE -#define ITERATE 65536 -#endif - -struct affinity { - uint32_t delta; - uint32_t request; -}; - struct block { unsigned int tid; }; @@ -45,80 +27,6 @@ static uint64_t nthr; static uint64_t object[2] CK_CC_CACHELINE; -#ifdef __linux__ -#ifndef gettid -static pid_t -gettid(void) -{ - return syscall(__NR_gettid); -} -#endif - -static int -aff_iterate(struct affinity *acb) -{ - cpu_set_t s; - int c; - - c = ck_pr_faa_32(&acb->request, acb->delta); - CPU_ZERO(&s); - CPU_SET(c % CORES, &s); - - return sched_setaffinity(gettid(), sizeof(s), &s); -} -#else -static int -aff_iterate(struct affinity *acb) -{ - acb = NULL; - return (0); -} -#endif - -__attribute__((used)) static void -gen_lock(void) -{ -#ifdef LOCK_STATE - LOCK_STATE; -#endif - -#ifdef LOCK - LOCK; -#endif -} - -__attribute__((used)) static void -gen_unlock(void) -{ -#ifdef LOCK_STATE - LOCK_STATE; -#endif - -#ifdef UNLOCK - UNLOCK; -#endif -} - -static inline uint64_t -rdtsc(void) -{ - uint32_t eax = 0, edx; - - __asm__ __volatile__("cpuid;" - "rdtsc;" - : "+a" (eax), "=d" (edx) - : - : "%rcx", "%rbx", "memory"); - - __asm__ __volatile__("xorl %%eax, %%eax;" - "cpuid;" - : - : - : "%rax", "%rbx", "%rcx", "%rdx", "memory"); - - return (((uint64_t)edx << 32) | eax); -} - static void * fairness(void *null) { diff --git a/regressions/ck_ring/benchmark/latency.c b/regressions/ck_ring/benchmark/latency.c index f148903..eb3ed85 100644 --- a/regressions/ck_ring/benchmark/latency.c +++ b/regressions/ck_ring/benchmark/latency.c @@ -4,6 +4,8 @@ #include #include +#include "../../common.h" + #ifndef ITERATIONS #define ITERATIONS (128000) #endif @@ -15,30 +17,6 @@ struct entry { CK_RING(entry, entry_ring) static CK_RING_INSTANCE(entry_ring) ring; -static inline uint64_t -rdtsc(void) -{ -#if defined(__x86_64__) || defined(__x86__) - uint32_t eax = 0, edx; - - __asm__ __volatile__("cpuid;" - "rdtsc;" - : "+a" (eax), "=d" (edx) - : - : "%ecx", "%ebx", "memory"); - - __asm__ __volatile__("xorl %%eax, %%eax;" - "cpuid;" - : - : - : "%eax", "%ebx", "%ecx", "%edx", "memory"); - - return (((uint64_t)edx << 32) | eax); -#else - return 0; -#endif -} - int main(int argc, char *argv[]) { diff --git a/regressions/ck_ring/validate/ck_ring_spsc.c b/regressions/ck_ring/validate/ck_ring_spsc.c index 67fed98..54d6d9a 100644 --- a/regressions/ck_ring/validate/ck_ring_spsc.c +++ b/regressions/ck_ring/validate/ck_ring_spsc.c @@ -4,27 +4,12 @@ #include #include - -#ifdef __linux__ -#include -#include -#include -#include -#endif +#include "../../common.h" #ifndef ITERATIONS #define ITERATIONS 128 #endif -#ifndef CORES -#define CORES 8 -#endif - -struct affinity { - uint32_t delta; - uint32_t request; -}; - struct context { unsigned int tid; unsigned int previous; @@ -42,36 +27,6 @@ static struct affinity a; static int size; static volatile int barrier; -#ifdef __linux__ -#ifndef gettid -static pid_t -gettid(void) -{ - return syscall(__NR_gettid); -} -#endif - -static int -aff_iterate(struct affinity *acb) -{ - cpu_set_t s; - int c; - - c = ck_pr_faa_32(&acb->request, acb->delta); - CPU_ZERO(&s); - CPU_SET(c % CORES, &s); - - return sched_setaffinity(gettid(), sizeof(s), &s); -} -#else -static int -aff_iterate(struct affinity *acb) -{ - acb = NULL; - return (0); -} -#endif - static void * test(void *c) { diff --git a/regressions/ck_ring/validate/ck_ring_spsc_template.c b/regressions/ck_ring/validate/ck_ring_spsc_template.c index 6ffcccb..fcaa8b9 100644 --- a/regressions/ck_ring/validate/ck_ring_spsc_template.c +++ b/regressions/ck_ring/validate/ck_ring_spsc_template.c @@ -4,27 +4,12 @@ #include #include - -#ifdef __linux__ -#include -#include -#include -#include -#endif +#include "../../common.h" #ifndef ITERATIONS #define ITERATIONS 128 #endif -#ifndef CORES -#define CORES 8 -#endif - -struct affinity { - uint32_t delta; - uint32_t request; -}; - struct context { unsigned int tid; unsigned int previous; @@ -43,36 +28,6 @@ static struct affinity a; static int size; static volatile int barrier; -#ifdef __linux__ -#ifndef gettid -static pid_t -gettid(void) -{ - return syscall(__NR_gettid); -} -#endif - -static int -aff_iterate(struct affinity *acb) -{ - cpu_set_t s; - int c; - - c = ck_pr_faa_32(&acb->request, acb->delta); - CPU_ZERO(&s); - CPU_SET(c % CORES, &s); - - return sched_setaffinity(gettid(), sizeof(s), &s); -} -#else -static int -aff_iterate(struct affinity *acb) -{ - acb = NULL; - return (0); -} -#endif - static void * test(void *c) { diff --git a/regressions/ck_sequence/validate/ck_sequence.c b/regressions/ck_sequence/validate/ck_sequence.c index db86850..a689839 100644 --- a/regressions/ck_sequence/validate/ck_sequence.c +++ b/regressions/ck_sequence/validate/ck_sequence.c @@ -5,26 +5,12 @@ #include #include -#ifdef __linux__ -#include -#include -#include -#include -#endif - -#ifndef CORES -#define CORES 8 -#endif +#include "../../common.h" #ifndef STEPS #define STEPS 1000000 #endif -struct affinity { - uint32_t delta; - uint32_t request; -}; - struct example { uint64_t a; uint64_t b; @@ -36,37 +22,6 @@ static ck_sequence_t seqlock CK_CC_CACHELINE = CK_SEQUENCE_INITIALIZER; static unsigned int barrier; static struct affinity affinerator; -#ifdef __linux__ -#ifndef gettid -static pid_t -gettid(void) -{ - return syscall(__NR_gettid); -} -#endif - -static int -aff_iterate(struct affinity *acb) -{ - cpu_set_t s; - int c; - - c = ck_pr_faa_32(&acb->request, acb->delta); - CPU_ZERO(&s); - CPU_SET(c % CORES, &s); - - return sched_setaffinity(gettid(), sizeof(s), &s); -} -#else -static int -aff_iterate(struct affinity *acb) -{ - acb = NULL; - return (0); -} -#endif - - static void * consumer(void *unused) { diff --git a/regressions/ck_spinlock/benchmark/Makefile b/regressions/ck_spinlock/benchmark/Makefile index 824376d..c040bea 100644 --- a/regressions/ck_spinlock/benchmark/Makefile +++ b/regressions/ck_spinlock/benchmark/Makefile @@ -1,41 +1,73 @@ .PHONY: all clean -all: ck_ticket ck_mcs ck_dec ck_cas ck_fas ck_clh linux_spinlock \ - ck_ticket_pb ck_anderson test_and_set +OBJECTS=ck_ticket.THROUGHPUT ck_ticket.LATENCY \ + ck_mcs.THROUGHPUT ck_mcs.LATENCY \ + ck_dec.THROUGHPUT ck_dec.LATENCY \ + ck_cas.THROUGHPUT ck_cas.LATENCY \ + ck_fas.THROUGHPUT ck_fas.LATENCY \ + ck_clh.THROUGHPUT ck_clh.LATENCY \ + linux_spinlock.THROUGHPUT linux_spinlock.LATENCY \ + ck_ticket_pb.THROUGHPUT ck_ticket_pb.LATENCY \ + ck_anderson.THROUGHPUT ck_anderson.LATENCY -linux_spinlock: linux_spinlock.c - $(CC) $(CFLAGS) -o linux_spinlock linux_spinlock.c +all: $(OBJECTS) -ck_ticket_pb: ck_ticket_pb.c - $(CC) $(CFLAGS) -o ck_ticket_pb ck_ticket_pb.c +ck_ticket.THROUGHPUT: ck_ticket.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_ticket.THROUGHPUT ck_ticket.c -ck_clh: ck_clh.c - $(CC) $(CFLAGS) -o ck_clh ck_clh.c +ck_ticket.LATENCY: ck_ticket.c + $(CC) -DLATENCY $(CFLAGS) -o ck_ticket.LATENCY ck_ticket.c -test_and_set: test_and_set.c - $(CC) $(CFLAGS) -o test_and_set test_and_set.c +ck_mcs.THROUGHPUT: ck_mcs.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_mcs.THROUGHPUT ck_mcs.c -ck_anderson: ck_anderson.c - $(CC) $(CFLAGS) -o ck_anderson ck_anderson.c +ck_mcs.LATENCY: ck_mcs.c + $(CC) -DLATENCY $(CFLAGS) -o ck_mcs.LATENCY ck_mcs.c -ck_fas: ck_fas.c - $(CC) $(CFLAGS) -o ck_fas ck_fas.c +ck_dec.THROUGHPUT: ck_dec.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_dec.THROUGHPUT ck_dec.c -ck_ticket: ck_ticket.c - $(CC) $(CFLAGS) -o ck_ticket ck_ticket.c +ck_dec.LATENCY: ck_dec.c + $(CC) -DLATENCY $(CFLAGS) -o ck_dec.LATENCY ck_dec.c -ck_cas: ck_cas.c - $(CC) $(CFLAGS) -o ck_cas ck_cas.c +ck_cas.THROUGHPUT: ck_cas.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_cas.THROUGHPUT ck_cas.c -ck_mcs: ck_mcs.c - $(CC) $(CFLAGS) -o ck_mcs ck_mcs.c +ck_cas.LATENCY: ck_cas.c + $(CC) -DLATENCY $(CFLAGS) -o ck_cas.LATENCY ck_cas.c -ck_dec: ck_dec.c - $(CC) $(CFLAGS) -o ck_dec ck_dec.c +ck_fas.THROUGHPUT: ck_fas.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_fas.THROUGHPUT ck_fas.c + +ck_fas.LATENCY: ck_fas.c + $(CC) -DLATENCY $(CFLAGS) -o ck_fas.LATENCY ck_fas.c + +ck_clh.THROUGHPUT: ck_clh.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_clh.THROUGHPUT ck_clh.c + +ck_clh.LATENCY: ck_clh.c + $(CC) -DLATENCY $(CFLAGS) -o ck_clh.LATENCY ck_clh.c + +linux_spinlock.THROUGHPUT: linux_spinlock.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o linux_spinlock.THROUGHPUT linux_spinlock.c + +linux_spinlock.LATENCY: linux_spinlock.c + $(CC) -DLATENCY $(CFLAGS) -o linux_spinlock.LATENCY linux_spinlock.c + +ck_ticket_pb.THROUGHPUT: ck_ticket_pb.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_ticket_pb.THROUGHPUT ck_ticket_pb.c + +ck_ticket_pb.LATENCY: ck_ticket_pb.c + $(CC) -DLATENCY $(CFLAGS) -o ck_ticket_pb.LATENCY ck_ticket_pb.c + +ck_anderson.THROUGHPUT: ck_anderson.c + $(CC) -DTHROUGHPUT $(CFLAGS) -o ck_anderson.THROUGHPUT ck_anderson.c + +ck_anderson.LATENCY: ck_anderson.c + $(CC) -DLATENCY $(CFLAGS) -o ck_anderson.LATENCY ck_anderson.c clean: - rm -rf ck_ticket ck_mcs ck_dec ck_cas ck_fas ck_clh linux_spinlock ck_ticket_pb \ - ck_anderson test_and_set *.dSYM + rm -rf *.dSYM $(OBJECTS) include ../../../build/regressions.build CFLAGS+=-D_GNU_SOURCE -lpthread -lm diff --git a/regressions/ck_spinlock/benchmark/benchmark.sh b/regressions/ck_spinlock/benchmark/benchmark.sh deleted file mode 100755 index 9b2267c..0000000 --- a/regressions/ck_spinlock/benchmark/benchmark.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/sh - -SPACE=" " -SYSTEM=`uname -s` - -case "$SYSTEM" in - "FreeBSD") - CORES=`sysctl -n hw.ncpu` - ;; - "Darwin") - CORES=`sysctl -n hw.activecpu` - ;; - "Linux") - CORES=`grep processor /proc/cpuinfo|wc -l` - ;; -esac - -echo "Detected $CORES cores." -echo - -for k in ck_clh ck_anderson ck_cas ck_dec ck_fas ck_mcs ck_ticket ck_ticket_pb; do - echo "===[ Beginning $k benchmarks..." - echo "# Cores $SPACE Total $SPACE Average $SPACE Deviation" > ${k}.data - - for j in `seq 1 $CORES`; do - printf " Beginning $j cores..." - printf " $j $SPACE " >> ${k}.data; - ./$k $j 1 0 2> /dev/null | awk '/deviation/ {printf("%16f ",$4)} /average/ {printf("%16.4d ",$4)} /total/ {printf("%16d ",$4)}' >> ${k}.data - echo >> ${k}.data - printf "done\n" - done -done diff --git a/regressions/ck_spinlock/benchmark/ck_anderson.c b/regressions/ck_spinlock/benchmark/ck_anderson.c index fa7ffe1..2f1aecd 100644 --- a/regressions/ck_spinlock/benchmark/ck_anderson.c +++ b/regressions/ck_spinlock/benchmark/ck_anderson.c @@ -1,2 +1,8 @@ #include "../ck_anderson.h" -#include "benchmark.h" + +#ifdef THROUGHPUT +#include "throughput.h" +#elif defined(LATENCY) +#include "latency.h" +#endif + diff --git a/regressions/ck_spinlock/benchmark/ck_cas.c b/regressions/ck_spinlock/benchmark/ck_cas.c index c27a877..96bd9d8 100644 --- a/regressions/ck_spinlock/benchmark/ck_cas.c +++ b/regressions/ck_spinlock/benchmark/ck_cas.c @@ -1,2 +1,8 @@ #include "../ck_cas.h" -#include "benchmark.h" + +#ifdef THROUGHPUT +#include "throughput.h" +#elif defined(LATENCY) +#include "latency.h" +#endif + diff --git a/regressions/ck_spinlock/benchmark/ck_clh.c b/regressions/ck_spinlock/benchmark/ck_clh.c index 15e0fd8..da71d5e 100644 --- a/regressions/ck_spinlock/benchmark/ck_clh.c +++ b/regressions/ck_spinlock/benchmark/ck_clh.c @@ -1,2 +1,7 @@ #include "../ck_clh.h" -#include "benchmark.h" + +#ifdef THROUGHPUT +#include "throughput.h" +#elif defined(LATENCY) +#include "latency.h" +#endif diff --git a/regressions/ck_spinlock/benchmark/ck_dec.c b/regressions/ck_spinlock/benchmark/ck_dec.c index 871ed88..115c116 100644 --- a/regressions/ck_spinlock/benchmark/ck_dec.c +++ b/regressions/ck_spinlock/benchmark/ck_dec.c @@ -1,2 +1,7 @@ #include "../ck_dec.h" -#include "benchmark.h" + +#ifdef THROUGHPUT +#include "throughput.h" +#elif defined(LATENCY) +#include "latency.h" +#endif diff --git a/regressions/ck_spinlock/benchmark/ck_fas.c b/regressions/ck_spinlock/benchmark/ck_fas.c index c285368..c76c964 100644 --- a/regressions/ck_spinlock/benchmark/ck_fas.c +++ b/regressions/ck_spinlock/benchmark/ck_fas.c @@ -1,2 +1,7 @@ #include "../ck_fas.h" -#include "benchmark.h" + +#ifdef THROUGHPUT +#include "throughput.h" +#elif defined(LATENCY) +#include "latency.h" +#endif diff --git a/regressions/ck_spinlock/benchmark/ck_mcs.c b/regressions/ck_spinlock/benchmark/ck_mcs.c index 658a049..c2e95de 100644 --- a/regressions/ck_spinlock/benchmark/ck_mcs.c +++ b/regressions/ck_spinlock/benchmark/ck_mcs.c @@ -1,2 +1,7 @@ #include "../ck_mcs.h" -#include "benchmark.h" + +#ifdef THROUGHPUT +#include "throughput.h" +#elif defined(LATENCY) +#include "latency.h" +#endif diff --git a/regressions/ck_spinlock/benchmark/ck_ticket.c b/regressions/ck_spinlock/benchmark/ck_ticket.c index 48482f5..09c9193 100644 --- a/regressions/ck_spinlock/benchmark/ck_ticket.c +++ b/regressions/ck_spinlock/benchmark/ck_ticket.c @@ -1,2 +1,8 @@ #include "../ck_ticket.h" -#include "benchmark.h" + +#ifdef THROUGHPUT +#include "throughput.h" +#elif defined(LATENCY) +#include "latency.h" +#endif + diff --git a/regressions/ck_spinlock/benchmark/ck_ticket_pb.c b/regressions/ck_spinlock/benchmark/ck_ticket_pb.c index 6452b50..6122d6a 100644 --- a/regressions/ck_spinlock/benchmark/ck_ticket_pb.c +++ b/regressions/ck_spinlock/benchmark/ck_ticket_pb.c @@ -1,2 +1,7 @@ #include "../ck_ticket_pb.h" -#include "benchmark.h" + +#ifdef THROUGHPUT +#include "throughput.h" +#elif defined(LATENCY) +#include "latency.h" +#endif diff --git a/regressions/ck_spinlock/benchmark/latency.h b/regressions/ck_spinlock/benchmark/latency.h new file mode 100644 index 0000000..06c1d60 --- /dev/null +++ b/regressions/ck_spinlock/benchmark/latency.h @@ -0,0 +1,48 @@ +#include +#include +#include +#include +#include + +#include "../../common.h" + +#ifndef STEPS +#define STEPS 30000000 +#endif + +LOCK_DEFINE; + +int +main(void) +{ + CK_CC_UNUSED unsigned int nthr = 1; + + #ifdef LOCK_INIT + LOCK_INIT; + #endif + + #ifdef LOCK_STATE + LOCK_STATE; + #endif + + uint64_t s_b, e_b, i; + + s_b = rdtsc(); + for (i = 0; i < STEPS; ++i) { + #ifdef LOCK + LOCK; + UNLOCK; + LOCK; + UNLOCK; + LOCK; + UNLOCK; + LOCK; + UNLOCK; + #endif + } + e_b = rdtsc(); + printf("%15" PRIu64 "\n", (e_b - s_b) / 4 / STEPS); + + return (0); +} + diff --git a/regressions/ck_spinlock/benchmark/linux_spinlock.c b/regressions/ck_spinlock/benchmark/linux_spinlock.c index 5f1d09a..954019b 100644 --- a/regressions/ck_spinlock/benchmark/linux_spinlock.c +++ b/regressions/ck_spinlock/benchmark/linux_spinlock.c @@ -1,2 +1,7 @@ #include "../linux_spinlock.h" -#include "benchmark.h" + +#ifdef THROUGHPUT +#include "throughput.h" +#elif defined(LATENCY) +#include "latency.h" +#endif diff --git a/regressions/ck_spinlock/benchmark/test_and_set.c b/regressions/ck_spinlock/benchmark/test_and_set.c deleted file mode 100644 index 2183e64..0000000 --- a/regressions/ck_spinlock/benchmark/test_and_set.c +++ /dev/null @@ -1,2 +0,0 @@ -#include "../test_and_set.h" -#include "benchmark.h" diff --git a/regressions/ck_spinlock/benchmark/benchmark.h b/regressions/ck_spinlock/benchmark/throughput.h similarity index 61% rename from regressions/ck_spinlock/benchmark/benchmark.h rename to regressions/ck_spinlock/benchmark/throughput.h index 478e1a1..4bb2a8c 100644 --- a/regressions/ck_spinlock/benchmark/benchmark.h +++ b/regressions/ck_spinlock/benchmark/throughput.h @@ -12,16 +12,7 @@ #include #include -#ifdef __linux__ -#include -#include -#include -#include -#endif - -#ifndef CORES -#define CORES 8 -#endif +#include "../../common.h" /* 8! = 40320, evenly divide 1 .. 8 processor workload. */ #define WORKLOAD (40320 * 2056) @@ -30,11 +21,6 @@ #define ITERATE 65536 #endif -struct affinity { - uint32_t delta; - uint32_t request; -}; - struct block { unsigned int tid; }; @@ -48,37 +34,7 @@ int critical __attribute__((aligned(64))); LOCK_DEFINE; -#ifdef __linux__ -#ifndef gettid -static pid_t -gettid(void) -{ - return syscall(__NR_gettid); -} -#endif - -static int -aff_iterate(struct affinity *acb) -{ - cpu_set_t s; - int c; - - c = ck_pr_faa_32(&acb->request, acb->delta); - CPU_ZERO(&s); - CPU_SET(c % CORES, &s); - - return sched_setaffinity(gettid(), sizeof(s), &s); -} -#else -static int -aff_iterate(struct affinity *acb) -{ - acb = NULL; - return (0); -} -#endif - -__attribute__((used)) static void +CK_CC_USED static void gen_lock(void) { #ifdef LOCK_STATE @@ -90,7 +46,7 @@ gen_lock(void) #endif } -__attribute__((used)) static void +CK_CC_USED static void gen_unlock(void) { #ifdef LOCK_STATE @@ -218,66 +174,6 @@ main(int argc, char *argv[]) printf("# average : %15" PRIu64 "\n", v); printf("# deviation : %.2f (%.2f%%)\n\n", sqrt(d / nthr), (sqrt(d / nthr) / v) * 100.00); -#if 0 - fprintf(stderr, "Creating threads (latency)..."); - for (i = 0; i < nthr; i++) { - if (pthread_create(&threads[i], NULL, latency, context + i)) { - fprintf(stderr, "ERROR: Could not create thread %d\n", i); - exit(EXIT_FAILURE); - } - } - fprintf(stderr, "done\n"); -#endif - -#if 0 - ck_pr_store_uint(&ready, 1); - sleep(5); - - fprintf(stderr, "Waiting for threads to finish latency test..."); - for (i = 0; i < nthr; i++) - pthread_join(threads[i], NULL); - fprintf(stderr, "done\n\n"); - - ck_pr_store_uint(&ready, 0); - v = 0; - - for (i = 0; i < nthr; i++) { - printf("# %2d LatencyT: %15" PRIu64 "\n", i, count[i]); - v += count[i]; - } - - printf("# Latency: %15" PRIu64 "\n\n", v / nthr); - - fprintf(stderr, "Creating threads (workload)..."); - for (i = 0; i < nthr; i++) { - if (pthread_create(&threads[i], NULL, workload, context + i)) { - fprintf(stderr, "ERROR: Could not create thread %" PRIu64 "\n", i); - exit(EXIT_FAILURE); - } - } - fprintf(stderr, "done\n"); - - ck_pr_store_uint(&ready, 1); - { - struct timeval tim, ti; - double t1, t2; - - fprintf(stderr, "Waiting for threads to finish workload test..."); - - gettimeofday(&tim, NULL); - - for (i = 0; i < nthr; i++) - pthread_join(threads[i], NULL); - - gettimeofday(&ti, NULL); - fprintf(stderr, "done\n\n"); - t1 = tim.tv_sec+(tim.tv_usec/1000000.0); - t2 = ti.tv_sec+(ti.tv_usec/1000000.0); - printf("# workload : %.6lf seconds elapsed\n", t2 - t1); - } - ck_pr_store_uint(&ready, 0); -#endif - return (0); } diff --git a/regressions/ck_spinlock/linux_spinlock.h b/regressions/ck_spinlock/linux_spinlock.h index 06a6e47..735efd3 100644 --- a/regressions/ck_spinlock/linux_spinlock.h +++ b/regressions/ck_spinlock/linux_spinlock.h @@ -1,5 +1,6 @@ +#include -static inline void +CK_CC_INLINE static void spin_lock(volatile unsigned int *lock) { #ifdef __x86_64__ @@ -20,7 +21,7 @@ spin_lock(volatile unsigned int *lock) return; } -static inline void +CK_CC_INLINE static void spin_unlock(volatile unsigned int *lock) { #ifdef __x86_64__ diff --git a/regressions/ck_spinlock/test_and_set.h b/regressions/ck_spinlock/test_and_set.h deleted file mode 100644 index 98e63ab..0000000 --- a/regressions/ck_spinlock/test_and_set.h +++ /dev/null @@ -1,34 +0,0 @@ -static inline void -spin_lock(volatile unsigned int *lock) -{ -#if defined(__x86__) || defined(__x86_64__) - asm volatile( - "\n1:\t" - "pause\n" - "lock decl %0\n\t" - "jns 2f\n" - "jmp 1b\n" - "2:\t" : "=m" (*lock) : : "memory"); -#else - *lock = 0; -#endif - - return; -} - -static inline void -spin_unlock(volatile unsigned int *lock) -{ -#if defined(__x86__) || defined(__x86_64__) - asm volatile("movl $1,%0" :"=m" (*lock) :: "memory"); -#else - *lock = 1; -#endif - return; -} - -#define LOCK_NAME "test_and_set" -#define LOCK_DEFINE volatile unsigned int lock = 1 -#define LOCK spin_lock(&lock) -#define UNLOCK spin_unlock(&lock) - diff --git a/regressions/ck_spinlock/validate/Makefile b/regressions/ck_spinlock/validate/Makefile index 610cc3d..ed2848c 100644 --- a/regressions/ck_spinlock/validate/Makefile +++ b/regressions/ck_spinlock/validate/Makefile @@ -1,7 +1,7 @@ .PHONY: clean all: ck_ticket ck_mcs ck_dec ck_cas ck_fas ck_clh linux_spinlock \ - ck_ticket_pb ck_anderson test_and_set + ck_ticket_pb ck_anderson linux_spinlock: linux_spinlock.c $(CC) $(CFLAGS) -o linux_spinlock linux_spinlock.c @@ -12,9 +12,6 @@ ck_ticket_pb: ck_ticket_pb.c ck_clh: ck_clh.c $(CC) $(CFLAGS) -o ck_clh ck_clh.c -test_and_set: test_and_set.c - $(CC) $(CFLAGS) -o test_and_set test_and_set.c - ck_anderson: ck_anderson.c $(CC) $(CFLAGS) -o ck_anderson ck_anderson.c diff --git a/regressions/ck_spinlock/validate/test_and_set.c b/regressions/ck_spinlock/validate/test_and_set.c deleted file mode 100644 index 52399ee..0000000 --- a/regressions/ck_spinlock/validate/test_and_set.c +++ /dev/null @@ -1,2 +0,0 @@ -#include "../test_and_set.h" -#include "validate.h" diff --git a/regressions/ck_spinlock/validate/validate.h b/regressions/ck_spinlock/validate/validate.h index 49a7c8a..18b8bd7 100644 --- a/regressions/ck_spinlock/validate/validate.h +++ b/regressions/ck_spinlock/validate/validate.h @@ -9,29 +9,16 @@ #include #include +#include #include #include -#ifdef __linux__ -#include -#include -#include -#include -#endif - -#ifndef CORES -#define CORES 8 -#endif +#include "../../common.h" #ifndef ITERATE #define ITERATE 1000000 #endif -struct affinity { - uint32_t delta; - uint32_t request; -}; - struct block { unsigned int tid; }; @@ -42,38 +29,8 @@ static uint64_t nthr; LOCK_DEFINE; -#ifdef __linux__ -#ifndef gettid -static pid_t -gettid(void) -{ - return syscall(__NR_gettid); -} -#endif - -static int -aff_iterate(struct affinity *acb) -{ - cpu_set_t s; - int c; - - c = ck_pr_faa_32(&acb->request, acb->delta); - CPU_ZERO(&s); - CPU_SET(c % CORES, &s); - - return sched_setaffinity(gettid(), sizeof(s), &s); -} -#else -static int -aff_iterate(struct affinity *acb) -{ - acb = NULL; - return (0); -} -#endif - static void * -thread(void *null __attribute__((unused))) +thread(void *null CK_CC_UNUSED) { #ifdef LOCK_STATE LOCK_STATE; diff --git a/regressions/ck_stack/benchmark/Makefile b/regressions/ck_stack/benchmark/Makefile new file mode 100644 index 0000000..635bd4b --- /dev/null +++ b/regressions/ck_stack/benchmark/Makefile @@ -0,0 +1,14 @@ +.PHONY: clean distribution + +OBJECTS=latency + +all: $(OBJECTS) + +latency: latency.c + $(CC) $(CFLAGS) -o latency latency.c + +clean: + rm -rf *~ *.o *.dSYM $(OBJECTS) + +include ../../../build/regressions.build +CFLAGS+=-lpthread diff --git a/regressions/ck_stack/benchmark/latency.c b/regressions/ck_stack/benchmark/latency.c new file mode 100644 index 0000000..ddbd195 --- /dev/null +++ b/regressions/ck_stack/benchmark/latency.c @@ -0,0 +1,125 @@ +#include +#include +#include +#include +#include + +#include "../../common.h" + +#ifndef ENTRIES +#define ENTRIES 1024 +#endif + +#ifndef STEPS +#define STEPS 4000 +#endif + +static ck_stack_t stack; + +int +main(void) +{ + ck_stack_entry_t entry[4096]; + ck_spinlock_fas_t mutex = CK_SPINLOCK_FAS_INITIALIZER; + volatile ck_stack_entry_t * volatile r; + uint64_t s, e, a; + unsigned int i; + unsigned int j; + + a = 0; + for (i = 0; i < STEPS; i++) { + ck_stack_init(&stack); + + s = rdtsc(); + for (j = 0; j < sizeof(entry) / sizeof(*entry); j++) { + ck_spinlock_fas_lock(&mutex); + ck_stack_push_spnc(&stack, entry + j); + ck_spinlock_fas_unlock(&mutex); + } + e = rdtsc(); + + a += e - s; + } + printf(" push_spinlock: %16" PRIu64 "\n", a / STEPS / (sizeof(entry) / sizeof(*entry))); + +#ifdef CK_F_STACK_PUSH_UPMC + a = 0; + for (i = 0; i < STEPS; i++) { + ck_stack_init(&stack); + + s = rdtsc(); + for (j = 0; j < sizeof(entry) / sizeof(*entry); j++) + ck_stack_push_upmc(&stack, entry + j); + e = rdtsc(); + + a += e - s; + } + printf("ck_stack_push_upmc: %16" PRIu64 "\n", a / STEPS / (sizeof(entry) / sizeof(*entry))); +#endif /* CK_F_STACK_PUSH_UPMC */ + +#ifdef CK_F_STACK_PUSH_MPMC + a = 0; + for (i = 0; i < STEPS; i++) { + ck_stack_init(&stack); + + s = rdtsc(); + for (j = 0; j < sizeof(entry) / sizeof(*entry); j++) + ck_stack_push_mpmc(&stack, entry + j); + e = rdtsc(); + + a += e - s; + } + printf("ck_stack_push_mpmc: %16" PRIu64 "\n", a / STEPS / (sizeof(entry) / sizeof(*entry))); +#endif /* CK_F_STACK_PUSH_MPMC */ + +#ifdef CK_F_STACK_PUSH_MPNC + a = 0; + for (i = 0; i < STEPS; i++) { + ck_stack_init(&stack); + + s = rdtsc(); + for (j = 0; j < sizeof(entry) / sizeof(*entry); j++) + ck_stack_push_mpnc(&stack, entry + j); + e = rdtsc(); + + a += e - s; + } + printf("ck_stack_push_mpnc: %16" PRIu64 "\n", a / STEPS / (sizeof(entry) / sizeof(*entry))); +#endif /* CK_F_STACK_PUSH_MPNC */ + +#if defined(CK_F_STACK_PUSH_UPMC) && defined(CK_F_STACK_POP_UPMC) + a = 0; + for (i = 0; i < STEPS; i++) { + ck_stack_init(&stack); + + for (j = 0; j < sizeof(entry) / sizeof(*entry); j++) + ck_stack_push_upmc(&stack, entry + j); + + s = rdtsc(); + for (j = 0; j < sizeof(entry) / sizeof(*entry); j++) + r = ck_stack_pop_upmc(&stack); + e = rdtsc(); + a += e - s; + } + printf(" ck_stack_pop_upmc: %16" PRIu64 "\n", a / STEPS / (sizeof(entry) / sizeof(*entry))); +#endif /* CK_F_STACK_PUSH_UPMC && CK_F_STACK_POP_UPMC */ + +#if defined(CK_F_STACK_POP_MPMC) && defined(CK_F_STACK_PUSH_MPMC) + a = 0; + for (i = 0; i < STEPS; i++) { + ck_stack_init(&stack); + + for (j = 0; j < sizeof(entry) / sizeof(*entry); j++) + ck_stack_push_mpmc(&stack, entry + j); + + s = rdtsc(); + for (j = 0; j < sizeof(entry) / sizeof(*entry); j++) + r = ck_stack_pop_mpmc(&stack); + e = rdtsc(); + a += e - s; + } + printf(" ck_stack_pop_mpmc: %16" PRIu64 "\n", a / STEPS / (sizeof(entry) / sizeof(*entry))); +#endif + + return 0; +} diff --git a/regressions/ck_stack/validate/benchmark.sh b/regressions/ck_stack/validate/benchmark.sh deleted file mode 100755 index ada8597..0000000 --- a/regressions/ck_stack/validate/benchmark.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/sh - -SPACE=" " -SYSTEM=`uname -s` - -case "$SYSTEM" in - "Darwin") - CORES=`sysctl -n hw.activecpu` - ;; - "Linux") - CORES=`grep processor /proc/cpuinfo|wc -l` - ;; -esac - -echo "Detected $CORES cores." -echo - -for k in push pair pop; do - echo "===[ Beginning $k benchmarks..." - printf "# Cores $SPACE" > ${k}.data - - for i in *_${k}; do - printf "$i $SPACE" >> ${k}.data - done - - echo >> ${k}.data - - for j in `seq 1 $CORES`; do - echo "===[ Beginning $j cores..." - - printf " $j $SPACE " >> ${k}.data; - for i in *_${k}; do - printf " Executing $i..." - ./$i $j 1 0 | awk '{printf("%.8f ",$2)}' >> ${k}.data - printf "done\n" - done - echo >> ${k}.data - done -done diff --git a/regressions/ck_stack/validate/pair.c b/regressions/ck_stack/validate/pair.c index 2fd44ac..896ad74 100644 --- a/regressions/ck_stack/validate/pair.c +++ b/regressions/ck_stack/validate/pair.c @@ -14,19 +14,7 @@ #include #include -#ifdef __linux__ -#include -#include -#include -#endif - -#ifndef CORES -#define CORES 8 -#endif - -#ifndef CACHE_LINE_SIZE -#define CACHE_LINE_SIZE 64 -#endif +#include "../../common.h" #ifndef ITEMS #define ITEMS (5765760) @@ -34,11 +22,6 @@ #define TVTOD(tv) ((tv).tv_sec+((tv).tv_usec / (double)1000000)) -struct affinity { - uint32_t delta; - uint32_t request; -}; - struct entry { int value; #if defined(SPINLOCK) || defined(PTHREADS) @@ -72,37 +55,6 @@ static unsigned long long nthr; static volatile unsigned int barrier = 0; static unsigned int critical; -#ifdef __linux__ -#ifndef gettid -static pid_t -gettid(void) -{ - return syscall(__NR_gettid); -} -#endif - -static int -aff_iterate(struct affinity *acb) -{ - cpu_set_t s; - int c; - - c = ck_pr_faa_32(&acb->request, acb->delta); - - CPU_ZERO(&s); - CPU_SET(c % CORES, &s); - - return sched_setaffinity(gettid(), sizeof(s), &s); -} -#else -static int -aff_iterate(struct affinity *acb) -{ - acb = NULL; - return (0); -} -#endif - static void * stack_thread(void *buffer) { diff --git a/regressions/ck_stack/validate/pop.c b/regressions/ck_stack/validate/pop.c index 5f5d7ae..1a0cbc7 100644 --- a/regressions/ck_stack/validate/pop.c +++ b/regressions/ck_stack/validate/pop.c @@ -14,19 +14,7 @@ #include #include -#ifdef __linux__ -#include -#include -#include -#endif - -#ifndef CORES -#define CORES 8 -#endif - -#ifndef CACHE_LINE_SIZE -#define CACHE_LINE_SIZE 64 -#endif +#include "../../common.h" #ifndef ITEMS #define ITEMS (5765760 * 2) @@ -34,11 +22,6 @@ #define TVTOD(tv) ((tv).tv_sec+((tv).tv_usec / (double)1000000)) -struct affinity { - uint32_t delta; - uint32_t request; -}; - struct entry { int value; #ifdef SPINLOCK @@ -62,42 +45,11 @@ static ck_stack_t stack CK_CC_CACHELINE; CK_STACK_CONTAINER(struct entry, next, getvalue) #endif -static struct affinity affinerator; +static struct affinity affinerator = AFFINITY_INITIALIZER; static unsigned long long nthr; static volatile unsigned int barrier = 0; static unsigned int critical; -#ifdef __linux__ -#ifndef gettid -static pid_t -gettid(void) -{ - return syscall(__NR_gettid); -} -#endif - -static int -aff_iterate(struct affinity *acb) -{ - cpu_set_t s; - int c; - - c = ck_pr_faa_32(&acb->request, acb->delta); - - CPU_ZERO(&s); - CPU_SET(c % CORES, &s); - - return sched_setaffinity(gettid(), sizeof(s), &s); -} -#else -static int -aff_iterate(struct affinity *acb) -{ - acb = NULL; - return (0); -} -#endif - static void * stack_thread(void *unused CK_CC_UNUSED) { @@ -245,7 +197,6 @@ main(int argc, char *argv[]) srand(getpid()); - affinerator.request = 0; affinerator.delta = d; n = ITEMS / nthr; diff --git a/regressions/ck_stack/validate/push.c b/regressions/ck_stack/validate/push.c index 734617f..3f36d5c 100644 --- a/regressions/ck_stack/validate/push.c +++ b/regressions/ck_stack/validate/push.c @@ -12,19 +12,7 @@ #include #include -#ifdef __linux__ -#include -#include -#include -#endif - -#ifndef CORES -#define CORES 8 -#endif - -#ifndef CACHE_LINE_SIZE -#define CACHE_LINE_SIZE 64 -#endif +#include "../../common.h" #ifndef ITEMS #define ITEMS (5765760 * 2) @@ -32,11 +20,6 @@ #define TVTOD(tv) ((tv).tv_sec+((tv).tv_usec / (double)1000000)) -struct affinity { - uint32_t delta; - uint32_t request; -}; - struct entry { int value; #ifdef SPINLOCK @@ -54,7 +37,7 @@ static ck_stack_t stack CK_CC_CACHELINE; CK_STACK_CONTAINER(struct entry, next, getvalue) -static struct affinity affinerator; +static struct affinity affinerator = AFFINITY_INITIALIZER; static unsigned long long nthr; static volatile unsigned int barrier = 0; static unsigned int critical; @@ -73,37 +56,6 @@ pthread_mutex_t stack_spinlock = PTHREAD_MUTEX_INITIALIZER; #define UNLOCK pthread_mutex_unlock #endif -#ifdef __linux__ -#ifndef gettid -static pid_t -gettid(void) -{ - return syscall(__NR_gettid); -} -#endif - -static int -aff_iterate(struct affinity *acb) -{ - cpu_set_t s; - int c; - - c = ck_pr_faa_32(&acb->request, acb->delta); - - CPU_ZERO(&s); - CPU_SET(c % CORES, &s); - - return sched_setaffinity(gettid(), sizeof(s), &s); -} -#else -static int -aff_iterate(struct affinity *acb) -{ - acb = NULL; - return (0); -} -#endif - static void * stack_thread(void *buffer) { diff --git a/regressions/common.h b/regressions/common.h new file mode 100644 index 0000000..f87b3d8 --- /dev/null +++ b/regressions/common.h @@ -0,0 +1,104 @@ +/* + * Copyright 2011 Samy Al Bahra. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include + +#ifdef __linux__ +#include +#include +#include +#include +#endif + +#ifndef CORES +#define CORES 8 +#endif + +struct affinity { + unsigned int delta; + unsigned int request; +}; + +#define AFFINITY_INITIALIZER {0, 0} + +#ifdef __linux__ +#ifndef gettid +static pid_t +gettid(void) +{ + return syscall(__NR_gettid); +} +#endif /* gettid */ + +CK_CC_UNUSED static int +aff_iterate(struct affinity *acb) +{ + cpu_set_t s; + unsigned int c; + + c = ck_pr_faa_uint(&acb->request, acb->delta); + CPU_ZERO(&s); + CPU_SET(c % CORES, &s); + + return sched_setaffinity(gettid(), sizeof(s), &s); +} +#else +CK_CC_UNUSED static int +aff_iterate(struct affinity *acb CK_CC_UNUSED) +{ + + return (0); +} +#endif + +CK_CC_INLINE static uint64_t +rdtsc(void) +{ +#if defined(__x86_64__) + uint32_t eax = 0, edx; + + __asm__ __volatile__("cpuid;" + "rdtsc;" + : "+a" (eax), "=d" (edx) + : + : "%ecx", "%ebx", "memory"); + + __asm__ __volatile__("xorl %%eax, %%eax;" + "cpuid;" + : + : + : "%eax", "%ebx", "%ecx", "%edx", "memory"); + + return (((uint64_t)edx << 32) | eax); +#elif defined(__sparcv9__) + uint64_t r; + + __asm__ __volatile__("rd %%tick, %0" : "=r" (r) :: "memory"); + return r; +#endif +} +