Re: [PATCH v4 01/10] sched: Provide sparsemask, a reduced contention bitmap

2019-01-31 Thread Tim Chen
On 12/6/18 1:28 PM, Steve Sistare wrote:
> Provide struct sparsemask and functions to manipulate it.  A sparsemask is
> a sparse bitmap.  It reduces cache contention vs the usual bitmap when many
> threads concurrently set, clear, and visit elements, by reducing the number
> of significant bits per cacheline.  For each cacheline chunk of the mask,
> only the first K bits of the first word are used, and the remaining bits
> are ignored, where K is a creation time parameter.  Thus a sparsemask that
> can represent a set of N elements is approximately (N/K * CACHELINE) bytes
> in size.
> 
> This type is simpler and more efficient than the struct sbitmap used by
> block drivers.
> 
> Signed-off-by: Steve Sistare 

Steve,

We did a test of this patch set with an OLTP benchmark using Oracle database
on a 2 socket SKX platform with 2 X 28 cores.  The patchset boosted the
performance by 3.5%.  

The percentage of cpu idle time is lowered by 5%,
with user time increased by 4% and kernel time increased by 1%,
indicating a better cpu utilization overall.

The performance looks encouraging.

Thanks.

Tim



[PATCH v4 01/10] sched: Provide sparsemask, a reduced contention bitmap

2018-12-06 Thread Steve Sistare
Provide struct sparsemask and functions to manipulate it.  A sparsemask is
a sparse bitmap.  It reduces cache contention vs the usual bitmap when many
threads concurrently set, clear, and visit elements, by reducing the number
of significant bits per cacheline.  For each cacheline chunk of the mask,
only the first K bits of the first word are used, and the remaining bits
are ignored, where K is a creation time parameter.  Thus a sparsemask that
can represent a set of N elements is approximately (N/K * CACHELINE) bytes
in size.

This type is simpler and more efficient than the struct sbitmap used by
block drivers.

Signed-off-by: Steve Sistare 
---
 kernel/sched/sparsemask.h | 210 ++
 1 file changed, 210 insertions(+)
 create mode 100644 kernel/sched/sparsemask.h

diff --git a/kernel/sched/sparsemask.h b/kernel/sched/sparsemask.h
new file mode 100644
index 000..1194862
--- /dev/null
+++ b/kernel/sched/sparsemask.h
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sparsemask.h - sparse bitmap operations
+ *
+ * Copyright (c) 2018 Oracle Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_SPARSEMASK_H
+#define __LINUX_SPARSEMASK_H
+
+#include 
+#include 
+#include 
+
+/*
+ * A sparsemask is a sparse bitmap.  It reduces cache contention vs the usual
+ * bitmap when many threads concurrently set, clear, and visit elements.  For
+ * each cacheline chunk of the mask, only the first K bits of the first word 
are
+ * used, and the remaining bits are ignored, where K is a creation time
+ * parameter.  Thus a sparsemask that can represent a set of N elements is
+ * approximately (N/K * CACHELINE) bytes in size.
+ *
+ * Clients pass and receive element numbers in the public API, and the
+ * implementation translates them to bit numbers to perform the bitmap
+ * operations.
+ */
+
+struct sparsemask_chunk {
+   unsigned long word; /* the significant bits */
+} cacheline_aligned_in_smp;
+
+struct sparsemask {
+   short nelems;   /* current number of elements */
+   short density;  /* store 2^density elements per chunk */
+   struct sparsemask_chunk chunks[0];  /* embedded array of chunks */
+};
+
+#define _SMASK_INDEX(density, elem)((elem) >> (density))
+#define _SMASK_BIT(density, elem)  ((elem) & ((1U << (density)) - 1U))
+#define SMASK_INDEX(mask, elem)_SMASK_INDEX((mask)->density, 
elem)
+#define SMASK_BIT(mask, elem)  _SMASK_BIT((mask)->density, elem)
+#define SMASK_WORD(mask, elem) \
+   (&(mask)->chunks[SMASK_INDEX((mask), (elem))].word)
+
+/*
+ * sparsemask_next() - Return the next one bit in a bitmap, starting at a
+ * specified position and wrapping from the last bit to the first, up to but
+ * not including a specified origin.  This is a helper, so do not call it
+ * directly.
+ *
+ * @mask: Bitmap to search.
+ * @origin: Origin.
+ * @prev: Previous bit. Start search after this bit number.
+ *   If -1, start search at @origin.
+ *
+ * Return: the bit number, else mask->nelems if no bits are set in the range.
+ */
+static inline int
+sparsemask_next(const struct sparsemask *mask, int origin, int prev)
+{
+   int density = mask->density;
+   int bits_per_word = 1U << density;
+   const struct sparsemask_chunk *chunk;
+   int nelems = mask->nelems;
+   int next, bit, nbits;
+   unsigned long word;
+
+   /* Calculate number of bits to be searched. */
+   if (prev == -1) {
+   nbits = nelems;
+   next = origin;
+   } else if (prev < origin) {
+   nbits = origin - prev;
+   next = prev + 1;
+   } else {
+   nbits = nelems - prev + origin - 1;
+   next = prev + 1;
+   }
+
+   if (unlikely(next >= nelems))
+   return nelems;
+
+   /*
+* Fetch and adjust first word.  Clear word bits below @next, and round
+* @next down to @bits_per_word boundary because later ffs will add
+* those bits back.
+*/
+   chunk = >chunks[_SMASK_INDEX(density, next)];
+   bit = _SMASK_BIT(density, next);
+   word = chunk->word & (~0UL << bit);
+   next -= bit;
+   nbits += bit;
+
+   while (!word) {
+   next += bits_per_word;
+   nbits -= bits_per_word;
+   if (nbits <= 0)
+   return nelems;
+
+   if (next >= nelems) {
+   chunk 

[PATCH v4 01/10] sched: Provide sparsemask, a reduced contention bitmap

2018-12-06 Thread Steve Sistare
Provide struct sparsemask and functions to manipulate it.  A sparsemask is
a sparse bitmap.  It reduces cache contention vs the usual bitmap when many
threads concurrently set, clear, and visit elements, by reducing the number
of significant bits per cacheline.  For each cacheline chunk of the mask,
only the first K bits of the first word are used, and the remaining bits
are ignored, where K is a creation time parameter.  Thus a sparsemask that
can represent a set of N elements is approximately (N/K * CACHELINE) bytes
in size.

This type is simpler and more efficient than the struct sbitmap used by
block drivers.

Signed-off-by: Steve Sistare 
---
 kernel/sched/sparsemask.h | 210 ++
 1 file changed, 210 insertions(+)
 create mode 100644 kernel/sched/sparsemask.h

diff --git a/kernel/sched/sparsemask.h b/kernel/sched/sparsemask.h
new file mode 100644
index 000..1194862
--- /dev/null
+++ b/kernel/sched/sparsemask.h
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sparsemask.h - sparse bitmap operations
+ *
+ * Copyright (c) 2018 Oracle Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_SPARSEMASK_H
+#define __LINUX_SPARSEMASK_H
+
+#include 
+#include 
+#include 
+
+/*
+ * A sparsemask is a sparse bitmap.  It reduces cache contention vs the usual
+ * bitmap when many threads concurrently set, clear, and visit elements.  For
+ * each cacheline chunk of the mask, only the first K bits of the first word 
are
+ * used, and the remaining bits are ignored, where K is a creation time
+ * parameter.  Thus a sparsemask that can represent a set of N elements is
+ * approximately (N/K * CACHELINE) bytes in size.
+ *
+ * Clients pass and receive element numbers in the public API, and the
+ * implementation translates them to bit numbers to perform the bitmap
+ * operations.
+ */
+
+struct sparsemask_chunk {
+   unsigned long word; /* the significant bits */
+} cacheline_aligned_in_smp;
+
+struct sparsemask {
+   short nelems;   /* current number of elements */
+   short density;  /* store 2^density elements per chunk */
+   struct sparsemask_chunk chunks[0];  /* embedded array of chunks */
+};
+
+#define _SMASK_INDEX(density, elem)((elem) >> (density))
+#define _SMASK_BIT(density, elem)  ((elem) & ((1U << (density)) - 1U))
+#define SMASK_INDEX(mask, elem)_SMASK_INDEX((mask)->density, 
elem)
+#define SMASK_BIT(mask, elem)  _SMASK_BIT((mask)->density, elem)
+#define SMASK_WORD(mask, elem) \
+   (&(mask)->chunks[SMASK_INDEX((mask), (elem))].word)
+
+/*
+ * sparsemask_next() - Return the next one bit in a bitmap, starting at a
+ * specified position and wrapping from the last bit to the first, up to but
+ * not including a specified origin.  This is a helper, so do not call it
+ * directly.
+ *
+ * @mask: Bitmap to search.
+ * @origin: Origin.
+ * @prev: Previous bit. Start search after this bit number.
+ *   If -1, start search at @origin.
+ *
+ * Return: the bit number, else mask->nelems if no bits are set in the range.
+ */
+static inline int
+sparsemask_next(const struct sparsemask *mask, int origin, int prev)
+{
+   int density = mask->density;
+   int bits_per_word = 1U << density;
+   const struct sparsemask_chunk *chunk;
+   int nelems = mask->nelems;
+   int next, bit, nbits;
+   unsigned long word;
+
+   /* Calculate number of bits to be searched. */
+   if (prev == -1) {
+   nbits = nelems;
+   next = origin;
+   } else if (prev < origin) {
+   nbits = origin - prev;
+   next = prev + 1;
+   } else {
+   nbits = nelems - prev + origin - 1;
+   next = prev + 1;
+   }
+
+   if (unlikely(next >= nelems))
+   return nelems;
+
+   /*
+* Fetch and adjust first word.  Clear word bits below @next, and round
+* @next down to @bits_per_word boundary because later ffs will add
+* those bits back.
+*/
+   chunk = >chunks[_SMASK_INDEX(density, next)];
+   bit = _SMASK_BIT(density, next);
+   word = chunk->word & (~0UL << bit);
+   next -= bit;
+   nbits += bit;
+
+   while (!word) {
+   next += bits_per_word;
+   nbits -= bits_per_word;
+   if (nbits <= 0)
+   return nelems;
+
+   if (next >= nelems) {
+   chunk