Re: [Qemu-devel] [PATCHv4 4/9] bitops: use vector algorithm to optimize find_next_bit()

2013-03-25 Thread Orit Wasserman
On 03/22/2013 02:46 PM, Peter Lieven wrote:
 this patch adds the usage of buffer_find_nonzero_offset()
 to skip large areas of zeroes.
 
 compared to loop unrolling presented in an earlier
 patch this adds another 50% performance benefit for
 skipping large areas of zeroes. loop unrolling alone
 added close to 100% speedup.
 
 Signed-off-by: Peter Lieven p...@kamp.de
 Reviewed-by: Eric Blake ebl...@redhat.com
 ---
  util/bitops.c |   24 +---
  1 file changed, 21 insertions(+), 3 deletions(-)
 
 diff --git a/util/bitops.c b/util/bitops.c
 index e72237a..9bb61ff 100644
 --- a/util/bitops.c
 +++ b/util/bitops.c
 @@ -42,10 +42,28 @@ unsigned long find_next_bit(const unsigned long *addr, 
 unsigned long size,
  size -= BITS_PER_LONG;
  result += BITS_PER_LONG;
  }
 -while (size  ~(BITS_PER_LONG-1)) {
 -if ((tmp = *(p++))) {
 -goto found_middle;
 +while (size = BITS_PER_LONG) {
 +tmp = *p;
 +if (tmp) {
 + goto found_middle;
 +}
 +if (can_use_buffer_find_nonzero_offset(p, size / BITS_PER_BYTE)) {
 +size_t tmp2 =
 +buffer_find_nonzero_offset(p, size / BITS_PER_BYTE);
 +result += tmp2 * BITS_PER_BYTE;
 +size -= tmp2 * BITS_PER_BYTE;
 +p += tmp2 / sizeof(unsigned long);
 +if (!size) {
 +return result;
 +}
 +if (tmp2) {
 +tmp = *p;
 +if (tmp) {
 +goto found_middle;
 +}
 +}
  }
 +p++;
  result += BITS_PER_LONG;
  size -= BITS_PER_LONG;
  }
 
Reviewed-by: Orit Wasserman owass...@redhat.com



[Qemu-devel] [PATCHv4 4/9] bitops: use vector algorithm to optimize find_next_bit()

2013-03-22 Thread Peter Lieven
this patch adds the usage of buffer_find_nonzero_offset()
to skip large areas of zeroes.

compared to loop unrolling presented in an earlier
patch this adds another 50% performance benefit for
skipping large areas of zeroes. loop unrolling alone
added close to 100% speedup.

Signed-off-by: Peter Lieven p...@kamp.de
Reviewed-by: Eric Blake ebl...@redhat.com
---
 util/bitops.c |   24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/util/bitops.c b/util/bitops.c
index e72237a..9bb61ff 100644
--- a/util/bitops.c
+++ b/util/bitops.c
@@ -42,10 +42,28 @@ unsigned long find_next_bit(const unsigned long *addr, 
unsigned long size,
 size -= BITS_PER_LONG;
 result += BITS_PER_LONG;
 }
-while (size  ~(BITS_PER_LONG-1)) {
-if ((tmp = *(p++))) {
-goto found_middle;
+while (size = BITS_PER_LONG) {
+tmp = *p;
+if (tmp) {
+ goto found_middle;
+}
+if (can_use_buffer_find_nonzero_offset(p, size / BITS_PER_BYTE)) {
+size_t tmp2 =
+buffer_find_nonzero_offset(p, size / BITS_PER_BYTE);
+result += tmp2 * BITS_PER_BYTE;
+size -= tmp2 * BITS_PER_BYTE;
+p += tmp2 / sizeof(unsigned long);
+if (!size) {
+return result;
+}
+if (tmp2) {
+tmp = *p;
+if (tmp) {
+goto found_middle;
+}
+}
 }
+p++;
 result += BITS_PER_LONG;
 size -= BITS_PER_LONG;
 }
-- 
1.7.9.5